Spaces:
Sleeping
Sleeping
Stefano Fiorucci
commited on
Commit
β’
829f2a2
1
Parent(s):
82fe524
fix and rename notebooks
Browse files
notebooks/index_creation.ipynb
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"cells":[{"cell_type":"markdown","id":"3922a573","metadata":{},"source":["# Index creation"]},{"cell_type":"markdown","id":"viixGIJcKPSQ","metadata":{"id":"viixGIJcKPSQ"},"source":["## Preliminary operations"]},{"cell_type":"code","execution_count":1,"id":"MevE4jEZ5QBT","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":25189,"status":"ok","timestamp":1652189481823,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"},"user_tz":-120},"id":"MevE4jEZ5QBT","outputId":"d4b2a927-e000-442b-ebc6-0d40d8a165d6"},"outputs":[{"name":"stdout","output_type":"stream","text":["Mounted at /content/drive\n"]}],"source":["from google.colab import drive\n","drive.mount('/content/drive')"]},{"cell_type":"code","execution_count":null,"id":"VYWRJ-Lf55nV","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":161669,"status":"ok","timestamp":1652189651623,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"},"user_tz":-120},"id":"VYWRJ-Lf55nV","outputId":"5c860ef6-d4cb-4293-d704-51454a3f88bf"},"outputs":[],"source":["# install dependencies\n","! pip install farm-haystack[faiss-gpu]"]},{"cell_type":"markdown","id":"QVDuHAMIK4bg","metadata":{"id":"QVDuHAMIK4bg"},"source":["## Load data"]},{"cell_type":"code","execution_count":3,"id":"72139774","metadata":{"execution":{"iopub.execute_input":"2022-01-09T08:40:46.176031Z","iopub.status.busy":"2022-01-09T08:40:46.175755Z","iopub.status.idle":"2022-01-09T08:40:46.179554Z","shell.execute_reply":"2022-01-09T08:40:46.178704Z","shell.execute_reply.started":"2022-01-09T08:40:46.175959Z"},"executionInfo":{"elapsed":32,"status":"ok","timestamp":1652189651625,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"},"user_tz":-120},"id":"72139774"},"outputs":[],"source":["import glob\n","import json"]},{"cell_type":"code","execution_count":4,"id":"4421e328","metadata":{"execution":{"iopub.execute_input":"2022-01-09T08:40:47.846999Z","iopub.status.busy":"2022-01-09T08:40:47.846757Z","iopub.status.idle":"2022-01-09T08:40:48.327632Z","shell.execute_reply":"2022-01-09T08:40:48.326829Z","shell.execute_reply.started":"2022-01-09T08:40:47.846975Z"},"executionInfo":{"elapsed":24363,"status":"ok","timestamp":1652189675961,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"},"user_tz":-120},"id":"4421e328"},"outputs":[],"source":["DATA_DIRECTORY = '/content/drive/MyDrive/Colab Notebooks/wklp/data'\n","\n","docs=[]\n","\n","for json_file in glob.glob(f'{DATA_DIRECTORY}/*.json'):\n"," with open(json_file, 'r') as fin:\n"," json_content=json.load(fin)\n"," \n"," doc={'content': json_content['text'],\n"," 'meta': {'name': json_content['name'],\n"," 'url': json_content['url']}}\n"," docs.append(doc)"]},{"cell_type":"code","execution_count":5,"id":"GR6qWQAn72WG","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":9,"status":"ok","timestamp":1652189679928,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"},"user_tz":-120},"id":"GR6qWQAn72WG","outputId":"3e17336f-1145-43ff-c3ca-fab7604343d1"},"outputs":[{"data":{"text/plain":["1087"]},"execution_count":5,"metadata":{},"output_type":"execute_result"}],"source":["len(docs)"]},{"cell_type":"code","execution_count":6,"id":"aa231b94","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"execution":{"iopub.execute_input":"2022-01-09T08:40:48.796741Z","iopub.status.busy":"2022-01-09T08:40:48.796550Z","iopub.status.idle":"2022-01-09T08:40:48.805224Z","shell.execute_reply":"2022-01-09T08:40:48.804705Z","shell.execute_reply.started":"2022-01-09T08:40:48.796722Z"},"executionInfo":{"elapsed":10,"status":"ok","timestamp":1652189681394,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"},"user_tz":-120},"id":"aa231b94","outputId":"a42147fb-b9a4-4500-cc96-ce73177030f9"},"outputs":[{"data":{"text/plain":["{'content': \"Pete Lindstrom\\nPete Lindstrom was a citizen of Twin Peaks, Washington who was killed in the Blizzard of 1889.\\nHis death was witnessed by Knut Zimmerman, who reported that wind had plunged a candle from the Annual Candlelighting and Christmas Tree Ceremony into the back of Lindstrom's head, killing him.\",\n"," 'meta': {'name': 'Pete_Lindstrom',\n"," 'url': 'https://twinpeaks.fandom.com/wiki/Pete_Lindstrom'}}"]},"execution_count":6,"metadata":{},"output_type":"execute_result"}],"source":["docs[5]"]},{"cell_type":"markdown","id":"Yu3bAUPoLrPI","metadata":{"id":"Yu3bAUPoLrPI"},"source":["## Define document store ([FAISS](https://github.com/facebookresearch/faiss)) and write documents\n","\n"]},{"cell_type":"code","execution_count":8,"id":"bfe846df","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"execution":{"iopub.execute_input":"2022-01-09T08:40:59.678181Z","iopub.status.busy":"2022-01-09T08:40:59.678003Z","iopub.status.idle":"2022-01-09T08:40:59.753228Z","shell.execute_reply":"2022-01-09T08:40:59.752500Z","shell.execute_reply.started":"2022-01-09T08:40:59.678161Z"},"executionInfo":{"elapsed":10410,"status":"ok","timestamp":1652190218453,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"},"user_tz":-120},"id":"bfe846df","outputId":"187c2d40-470a-4f87-ab50-ec4082bccb33"},"outputs":[{"name":"stderr","output_type":"stream","text":["INFO - haystack.modeling.model.optimization - apex not found, won't use it. See https://nvidia.github.io/apex/\n","ERROR - root - Failed to import 'magic' (from 'python-magic' and 'python-magic-bin' on Windows). FileTypeClassifier will not perform mimetype detection on extensionless files. Please make sure the necessary OS libraries are installed if you need this functionality.\n","INFO - haystack.telemetry - Haystack sends anonymous usage data to understand the actual usage and steer dev efforts towards features that are most meaningful to users. You can opt-out at anytime by calling disable_telemetry() or by manually setting the environment variable HAYSTACK_TELEMETRY_ENABLED as described for different operating systems on the documentation page. More information at https://haystack.deepset.ai/guides/telemetry\n"]}],"source":["from haystack.document_stores import FAISSDocumentStore\n","\n","# the document store settings are those compatible with Embedding Retriever\n","document_store = FAISSDocumentStore(\n"," similarity=\"dot_product\",\n"," embedding_dim=768)"]},{"cell_type":"code","execution_count":9,"id":"bc5adb1c","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"execution":{"iopub.execute_input":"2022-01-09T08:41:04.538529Z","iopub.status.busy":"2022-01-09T08:41:04.538227Z","iopub.status.idle":"2022-01-09T08:41:05.147190Z","shell.execute_reply":"2022-01-09T08:41:05.146513Z","shell.execute_reply.started":"2022-01-09T08:41:04.538503Z"},"executionInfo":{"elapsed":2085,"status":"ok","timestamp":1652190317389,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"},"user_tz":-120},"id":"bc5adb1c","outputId":"4cc11a2d-5ce5-41c1-e5eb-a0ee411ab00b"},"outputs":[{"name":"stdout","output_type":"stream","text":["[nltk_data] Downloading package punkt to /root/nltk_data...\n","[nltk_data] Unzipping tokenizers/punkt.zip.\n"]},{"name":"stderr","output_type":"stream","text":[" 0%| | 0/1087 [00:00<?, ?docs/s]WARNING - haystack.nodes.preprocessor.preprocessor - One or more sentence found with word count higher than the split length.\n","100%|ββββββββββ| 1087/1087 [00:01<00:00, 980.44docs/s]\n"]}],"source":["# preprocess documents, splitting by chunks of 200 words\n","\n","from haystack.nodes import PreProcessor\n","\n","processor = PreProcessor(\n"," clean_empty_lines=True,\n"," clean_whitespace=True,\n"," clean_header_footer=True,\n"," split_by=\"word\",\n"," split_length=200,\n"," split_respect_sentence_boundary=True,\n"," split_overlap=0,\n"," language ='en'\n",")\n","preprocessed_docs = processor.process(docs)"]},{"cell_type":"code","execution_count":11,"id":"41986306","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"execution":{"iopub.execute_input":"2022-01-09T08:41:07.414905Z","iopub.status.busy":"2022-01-09T08:41:07.414681Z","iopub.status.idle":"2022-01-09T08:41:07.418856Z","shell.execute_reply":"2022-01-09T08:41:07.418094Z","shell.execute_reply.started":"2022-01-09T08:41:07.414884Z"},"executionInfo":{"elapsed":370,"status":"ok","timestamp":1652190335845,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"},"user_tz":-120},"id":"41986306","outputId":"04292f05-fd20-432f-febc-8b91ce3d47c4"},"outputs":[{"name":"stdout","output_type":"stream","text":["<Document: id=3f6b71a59e1226326e53871d05393810, content='Pete Lindstrom\n","Pete Lindstrom was a citizen of Twin Peaks, Washington who was killed in the Blizzard ...'>\n"]}],"source":["print(preprocessed_docs[5])\n"]},{"cell_type":"code","execution_count":12,"id":"b9PS0PkM_1EF","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":370,"status":"ok","timestamp":1652190343399,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"},"user_tz":-120},"id":"b9PS0PkM_1EF","outputId":"25fba54f-46d9-4c53-b0c1-15e8a878cad0"},"outputs":[{"data":{"text/plain":["2825"]},"execution_count":12,"metadata":{},"output_type":"execute_result"}],"source":["len(preprocessed_docs)"]},{"cell_type":"code","execution_count":81,"id":"191144b4","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":49,"referenced_widgets":["425730d860514e2d87c0870cbb943842","06c58f8fc29343fa96e36d5b1f8dd078","046fa73af99645cc88b49c0f3e5f96b7","e256a26a0f41436a9755c56f3ffebd11","1e2bf8bf2ab14c9e880c06b04f752a1b","1377c76f1051467fb391c2c0119b0634","4d4babe9fcb24dd7996ecbeb7006018f","ff4bc8be1b8041e6a116bc37e366bf96","e004a6c61f2d4e1d8e9d02c51dcc6ebd","88c675dce7bd4247842ffeb6470d31dd","1d447ec86fe84008b29495ecb78a7fac"]},"execution":{"iopub.execute_input":"2022-01-09T08:41:10.695292Z","iopub.status.busy":"2022-01-09T08:41:10.695064Z","iopub.status.idle":"2022-01-09T08:41:22.144864Z","shell.execute_reply":"2022-01-09T08:41:22.144203Z","shell.execute_reply.started":"2022-01-09T08:41:10.695271Z"},"executionInfo":{"elapsed":11491,"status":"ok","timestamp":1652179167100,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"},"user_tz":-120},"id":"191144b4","outputId":"c30f2216-2c6c-4f28-867c-dfc0bd76bc09"},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"425730d860514e2d87c0870cbb943842","version_major":2,"version_minor":0},"text/plain":["Writing Documents: 0%| | 0/2825 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"}],"source":["# write documents\n","document_store.write_documents(preprocessed_docs)\n"]},{"cell_type":"markdown","id":"O7Eo9F7yf1N_","metadata":{"id":"O7Eo9F7yf1N_"},"source":["## Define retriever (Embedding Retriever) and generate document embeddings\n"]},{"cell_type":"code","execution_count":82,"id":"7993e609","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":188,"referenced_widgets":["ab5054496cae4e56b8f884db8cfa1cf7","61f277dcf14c4cc692c1cf6dd7c5a846","c7a72de53d104ff2b470ffe9a24b5a05","0f2a6092eb35478693982c6ba694eedf","abe2fe0c05634127bc61ddae4ecbefe9","617fefdfbf594f9d84b64528d58e391e","364f355213fd49e89373c5cc2bbbd646","99cd62ad76d740d197ca16db71359c9f","161afc4e516a4436a7edd60c8fe12dbf","d03003493ce243d38512f5a3990a80f7","ae739e7eca68419ca55f741ee17e325c","bb09ce6273944cd9be20a5d4730acfe5","ddd00b44cb994eaca361ee9d182854f5","8fc242cfcf074a0dbdd852a2d65d3c43","8f74df40a42443e1beda8e8f25d33c4d","902784ed90204018afb1050e58ab5785","e5586e38136f4bedb7f2c12e7d7993ee","02e6b8d39ac1478e8b831690d542937b","f8cd3a71bd724590bb22f01100931b30","04989dd1884b48c795cf59aa33686866","2e4df25efaa64b95acb29e7bce65e4c0","520ae85fb0804dafa7c6a56a81b80769"]},"execution":{"iopub.execute_input":"2022-01-09T08:41:22.146473Z","iopub.status.busy":"2022-01-09T08:41:22.146213Z","iopub.status.idle":"2022-01-09T08:41:30.833036Z","shell.execute_reply":"2022-01-09T08:41:30.832333Z","shell.execute_reply.started":"2022-01-09T08:41:22.146441Z"},"executionInfo":{"elapsed":95127,"status":"ok","timestamp":1652179262209,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"},"user_tz":-120},"id":"7993e609","outputId":"f31a14d8-281c-46b0-f882-22f7eb98d338"},"outputs":[{"name":"stderr","output_type":"stream","text":["INFO - haystack.modeling.utils - Using devices: CUDA:0\n","INFO - haystack.modeling.utils - Number of GPUs: 1\n","INFO - haystack.nodes.retriever.dense - Init retriever using embeddings of model sentence-transformers/multi-qa-mpnet-base-dot-v1\n","WARNING - haystack.nodes.retriever._embedding_encoder - You are using a Sentence Transformer with the dot_product function. We recommend using cosine instead. This can be set when initializing the DocumentStore\n","INFO - haystack.document_stores.faiss - Updating embeddings for 2811 docs...\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"ab5054496cae4e56b8f884db8cfa1cf7","version_major":2,"version_minor":0},"text/plain":["Updating Embedding: 0%| | 0/2811 [00:00<?, ? docs/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"bb09ce6273944cd9be20a5d4730acfe5","version_major":2,"version_minor":0},"text/plain":["Batches: 0%| | 0/88 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"}],"source":["from haystack.nodes import EmbeddingRetriever\n","\n","retriever = EmbeddingRetriever(\n"," document_store=document_store,\n"," embedding_model=\"sentence-transformers/multi-qa-mpnet-base-dot-v1\",\n"," model_format=\"sentence_transformers\"\n",")\n","document_store.update_embeddings(retriever)"]},{"cell_type":"markdown","id":"9QhguDpYf_5u","metadata":{"id":"9QhguDpYf_5u"},"source":["## Save and export index\n"]},{"cell_type":"code","execution_count":102,"id":"jLKDYZ1tnNZo","metadata":{"executionInfo":{"elapsed":195,"status":"ok","timestamp":1652180221595,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"},"user_tz":-120},"id":"jLKDYZ1tnNZo"},"outputs":[],"source":["import shutil\n","import glob"]},{"cell_type":"code","execution_count":100,"id":"7DVPCyzAhPEA","metadata":{"executionInfo":{"elapsed":174,"status":"ok","timestamp":1652180142621,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"},"user_tz":-120},"id":"7DVPCyzAhPEA"},"outputs":[],"source":["document_store.save(\"my_faiss_index.faiss\")"]},{"cell_type":"code","execution_count":null,"id":"8fYMVd_ggJnw","metadata":{"id":"8fYMVd_ggJnw"},"outputs":[],"source":["OUT_DIR = '/content/drive/MyDrive/Colab Notebooks/wklp/'"]},{"cell_type":"code","execution_count":105,"id":"F1GnHXZ4nPJI","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":508,"status":"ok","timestamp":1652180314288,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"},"user_tz":-120},"id":"F1GnHXZ4nPJI","outputId":"31b385af-af6b-4ad2-dc1a-c48dd6956990"},"outputs":[{"name":"stdout","output_type":"stream","text":["my_faiss_index.faiss\n","my_faiss_index.json\n","faiss_document_store.db\n","faiss_document_store.db\n"]}],"source":["for f in glob.glob('*faiss*.*')+glob.glob('faiss*.*'):\n"," print(f)\n"," shutil.copy(f, OUT_DIR)"]},{"cell_type":"markdown","id":"9x7Bo95fgTkm","metadata":{"id":"9x7Bo95fgTkm"},"source":["## Define reader"]},{"cell_type":"code","execution_count":52,"id":"9oJ3b3ukcT10","metadata":{"executionInfo":{"elapsed":204,"status":"ok","timestamp":1652177889794,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"},"user_tz":-120},"id":"9oJ3b3ukcT10"},"outputs":[],"source":["from haystack.nodes import FARMReader\n"]},{"cell_type":"code","execution_count":88,"id":"f5299f38","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"execution":{"iopub.execute_input":"2022-01-09T08:42:49.473881Z","iopub.status.busy":"2022-01-09T08:42:49.473628Z","iopub.status.idle":"2022-01-09T08:42:58.250644Z","shell.execute_reply":"2022-01-09T08:42:58.249808Z","shell.execute_reply.started":"2022-01-09T08:42:49.473851Z"},"executionInfo":{"elapsed":9316,"status":"ok","timestamp":1652179820100,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"},"user_tz":-120},"id":"f5299f38","outputId":"0209e180-0307-4f7f-cfad-27866307cf4a"},"outputs":[{"name":"stderr","output_type":"stream","text":["INFO - haystack.modeling.utils - Using devices: CUDA:0\n","INFO - haystack.modeling.utils - Number of GPUs: 1\n","INFO - haystack.modeling.model.language_model - LOADING MODEL\n","INFO - haystack.modeling.model.language_model - =============\n","INFO - haystack.modeling.model.language_model - Could not find deepset/roberta-base-squad2-distilled locally.\n","INFO - haystack.modeling.model.language_model - Looking on Transformers Model Hub (in local cache and online)...\n","INFO - haystack.modeling.model.language_model - Loaded deepset/roberta-base-squad2-distilled\n","INFO - haystack.modeling.utils - Using devices: CUDA\n","INFO - haystack.modeling.utils - Number of GPUs: 1\n","INFO - haystack.modeling.infer - Got ya 2 parallel workers to do inference ...\n","INFO - haystack.modeling.infer - 0 0 \n","INFO - haystack.modeling.infer - /w\\ /w\\ \n","INFO - haystack.modeling.infer - /'\\ / \\ \n"]}],"source":["reader = FARMReader(model_name_or_path=\"deepset/roberta-base-squad2-distilled\", use_gpu=True)"]},{"cell_type":"markdown","id":"tRgVAepagXo1","metadata":{"id":"tRgVAepagXo1"},"source":["## Define and try pipeline (retriever + reader)"]},{"cell_type":"code","execution_count":89,"id":"a2226345","metadata":{"execution":{"iopub.execute_input":"2022-01-09T08:42:58.252390Z","iopub.status.busy":"2022-01-09T08:42:58.252188Z","iopub.status.idle":"2022-01-09T08:42:58.256054Z","shell.execute_reply":"2022-01-09T08:42:58.255544Z","shell.execute_reply.started":"2022-01-09T08:42:58.252363Z"},"executionInfo":{"elapsed":9,"status":"ok","timestamp":1652179820101,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"},"user_tz":-120},"id":"a2226345"},"outputs":[],"source":["from haystack.pipelines import ExtractiveQAPipeline\n"]},{"cell_type":"code","execution_count":90,"id":"0fc15887","metadata":{"execution":{"iopub.execute_input":"2022-01-09T08:42:58.257027Z","iopub.status.busy":"2022-01-09T08:42:58.256867Z","iopub.status.idle":"2022-01-09T08:42:58.261446Z","shell.execute_reply":"2022-01-09T08:42:58.260756Z","shell.execute_reply.started":"2022-01-09T08:42:58.257009Z"},"executionInfo":{"elapsed":8,"status":"ok","timestamp":1652179820102,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"},"user_tz":-120},"id":"0fc15887"},"outputs":[],"source":["pipe = ExtractiveQAPipeline(reader, retriever)\n"]},{"cell_type":"code","execution_count":91,"id":"24fd8084","metadata":{"execution":{"iopub.execute_input":"2022-01-09T08:42:58.262798Z","iopub.status.busy":"2022-01-09T08:42:58.262569Z","iopub.status.idle":"2022-01-09T08:42:58.267587Z","shell.execute_reply":"2022-01-09T08:42:58.267054Z","shell.execute_reply.started":"2022-01-09T08:42:58.262772Z"},"executionInfo":{"elapsed":349,"status":"ok","timestamp":1652179820444,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"},"user_tz":-120},"id":"24fd8084"},"outputs":[],"source":["import time\n","from haystack.utils import print_answers"]},{"cell_type":"code","execution_count":99,"id":"e8bae423","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":920,"referenced_widgets":["1158604d42434203bc096856ed22bab8","c21f9accc8854d5fa3b60e4cd4891923","e13a9fe3eba54b7e8b5efe219f515f46","d0921da424dd4f68bcf85099820d17ba","d64dc173913146f0a6a413b158177ba5","f620a1ca0d47465ba079ecc3518b7abe","a44f766857084ecc95f9ea1ab879085e","f3e510c515b2456a8025c4b2e71f0463","4904f0e333824ff39f40807d415b9af0","898ea2f834cb444f920eeae85f941ebf","82055fffe4bc447492b9ab67d0d04ad2"]},"execution":{"iopub.execute_input":"2022-01-09T08:56:33.759683Z","iopub.status.busy":"2022-01-09T08:56:33.759457Z","iopub.status.idle":"2022-01-09T08:56:34.894724Z","shell.execute_reply":"2022-01-09T08:56:34.894183Z","shell.execute_reply.started":"2022-01-09T08:56:33.759662Z"},"executionInfo":{"elapsed":2574,"status":"ok","timestamp":1652180092923,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"},"user_tz":-120},"id":"e8bae423","outputId":"82f874ca-77df-4933-c9f1-ce55a8065ece"},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"1158604d42434203bc096856ed22bab8","version_major":2,"version_minor":0},"text/plain":["Batches: 0%| | 0/1 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"name":"stderr","output_type":"stream","text":["Inferencing Samples: 0%| | 0/1 [00:00<?, ? Batches/s]/usr/local/lib/python3.7/dist-packages/haystack/modeling/model/prediction_head.py:483: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').\n"," start_indices = flat_sorted_indices // max_seq_len\n","Inferencing Samples: 100%|ββββββββββ| 1/1 [00:00<00:00, 5.75 Batches/s]\n","Inferencing Samples: 100%|ββββββββββ| 1/1 [00:00<00:00, 5.42 Batches/s]\n","Inferencing Samples: 100%|ββββββββββ| 1/1 [00:00<00:00, 7.92 Batches/s]\n","Inferencing Samples: 100%|ββββββββββ| 1/1 [00:00<00:00, 12.85 Batches/s]\n","Inferencing Samples: 100%|ββββββββββ| 1/1 [00:00<00:00, 13.09 Batches/s]\n","Inferencing Samples: 100%|ββββββββββ| 1/1 [00:00<00:00, 6.00 Batches/s]\n","Inferencing Samples: 100%|ββββββββββ| 1/1 [00:00<00:00, 12.42 Batches/s]\n","Inferencing Samples: 100%|ββββββββββ| 1/1 [00:00<00:00, 19.19 Batches/s]\n","Inferencing Samples: 100%|ββββββββββ| 1/1 [00:00<00:00, 20.71 Batches/s]\n","Inferencing Samples: 100%|ββββββββββ| 1/1 [00:00<00:00, 13.32 Batches/s]"]},{"name":"stdout","output_type":"stream","text":["\n","2.474968910217285\n","\n","Query: Where is Twin Peaks\n","Answers:\n","[ { 'answer': 'Washington',\n"," 'context': 'Highway J\\n'\n"," 'Highway J was a highway that ran through Twin Peaks, '\n"," 'Washington. Notable buildings\\n'\n"," \"Gentleman Jim's\\n\"\n"," \"Horne's Department Store\\n\"\n"," 'Pine View Motel ',\n"," 'score': 0.9937074482440948},\n"," { 'answer': 'Washington',\n"," 'context': 'Chapel-in-the-Woods\\n'\n"," 'Chapel-in-the-Woods was a chapel in Twin Peaks, '\n"," 'Washington. Hank Jennings and Norma Jennings as well as Ed '\n"," 'Hurley and Nadine Hurle',\n"," 'score': 0.9566615521907806},\n"," { 'answer': 'northeastern Washington State',\n"," 'context': 'eriff Harry S. Truman\\n'\n"," 'Twin Peaks was a small logging town in northeastern '\n"," 'Washington State, five miles south of the Canadian border '\n"," 'and twelve miles w',\n"," 'score': 0.9068273603916168},\n"," { 'answer': 'along the shores of Black Lake',\n"," 'context': 'od National Forest. By 1888, the town of Twin Peaks '\n"," 'existed along the shores of Black Lake and was settled by '\n"," 'refugees, trappers, and thieves. The chi',\n"," 'score': 0.4931739866733551},\n"," { 'answer': 'Twin Peaks, Washington',\n"," 'context': 'ation Guide\\n'\n"," 'For descriptions of locations seen in the show, see Twin '\n"," 'Peaks, Washington\\n'\n"," '\"Location Guide\" is a featurette originally released in '\n"," 'the 200',\n"," 'score': 0.49235279858112335}]\n"]},{"name":"stderr","output_type":"stream","text":["\n"]}],"source":["start_time=time.time()\n","\n","prediction = pipe.run(\n"," query=\"Where is Twin Peaks\", params={\"Retriever\": {\"top_k\": 10}, \"Reader\": {\"top_k\": 5}}\n",")\n","\n","end_time=time.time()\n","\n","print()\n","print(end_time - start_time)\n","print_answers(prediction, details=\"medium\")\n"]},{"cell_type":"code","execution_count":71,"id":"be150456","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"execution":{"iopub.execute_input":"2022-01-09T08:56:36.820622Z","iopub.status.busy":"2022-01-09T08:56:36.820402Z","iopub.status.idle":"2022-01-09T08:56:36.830219Z","shell.execute_reply":"2022-01-09T08:56:36.826251Z","shell.execute_reply.started":"2022-01-09T08:56:36.820601Z"},"executionInfo":{"elapsed":22,"status":"ok","timestamp":1652178864337,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"},"user_tz":-120},"id":"be150456","outputId":"89216adc-4242-49cc-e3d8-efd52aa6d608"},"outputs":[{"name":"stdout","output_type":"stream","text":["\n","Query: Who killed Laura Palmer?\n","Answers:\n","[ { 'answer': 'Leland',\n"," 'context': '\" he remembered the name Laura had whispered into his ear '\n"," 'in his dream.\\n'\n"," ' Leland was taken back to the station and while under '\n"," 'control of BOB, he confe',\n"," 'score': 0.8553578555583954},\n"," { 'answer': 'Benjamin Horne',\n"," 'context': 'urdering Maddy just before she intended to go home.\\n'\n"," ' Two days later, Benjamin Horne had been arrested by the '\n"," \"sheriff's department, with Sheriff Truman\",\n"," 'score': 0.7564241290092468},\n"," { 'answer': 'Sarah',\n"," 'context': \"Laura's murder\\n\"\n"," ' Sarah stood in her kitchen the next morning, February 24, '\n"," '1989.\\n'\n"," ' She impatiently called for her daughter to wake up, but '\n"," 'received no a',\n"," 'score': 0.2567792162299156},\n"," { 'answer': 'Sarah',\n"," 'context': 'here Dale Cooper said she had once lived, Carrie Page '\n"," 'heard the sounds of Sarah calling Laura downstairs the '\n"," 'morning her body was discovered, and bega',\n"," 'score': 0.10802637040615082},\n"," { 'answer': '\"Sheriff Truman',\n"," 'context': 'ura would have left a note if she was with Bobby.\\n'\n"," ' Leland then said, \"Sheriff Truman,\" leading her to worry '\n"," 'even more.\\n'\n"," \" Sarah's suspicion was confirme\",\n"," 'score': 0.016497892793267965}]\n"]}],"source":["print_answers(prediction, details=\"medium\")\n"]}],"metadata":{"accelerator":"GPU","colab":{"collapsed_sections":["viixGIJcKPSQ"],"name":"wklp_embeddingretriever.ipynb","provenance":[],"toc_visible":true},"kernelspec":{"display_name":"saturn (Python 3)","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.9.5"},"widgets":{"application/vnd.jupyter.widget-state+json":{"02e6b8d39ac1478e8b831690d542937b":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"046fa73af99645cc88b49c0f3e5f96b7":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_ff4bc8be1b8041e6a116bc37e366bf96","max":2825,"min":0,"orientation":"horizontal","style":"IPY_MODEL_e004a6c61f2d4e1d8e9d02c51dcc6ebd","value":2825}},"04989dd1884b48c795cf59aa33686866":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"06c58f8fc29343fa96e36d5b1f8dd078":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_1377c76f1051467fb391c2c0119b0634","placeholder":"β","style":"IPY_MODEL_4d4babe9fcb24dd7996ecbeb7006018f","value":"Writing Documents: "}},"0f2a6092eb35478693982c6ba694eedf":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_d03003493ce243d38512f5a3990a80f7","placeholder":"β","style":"IPY_MODEL_ae739e7eca68419ca55f741ee17e325c","value":" 10000/? [01:29<00:00, 111.81 docs/s]"}},"1158604d42434203bc096856ed22bab8":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_c21f9accc8854d5fa3b60e4cd4891923","IPY_MODEL_e13a9fe3eba54b7e8b5efe219f515f46","IPY_MODEL_d0921da424dd4f68bcf85099820d17ba"],"layout":"IPY_MODEL_d64dc173913146f0a6a413b158177ba5"}},"1377c76f1051467fb391c2c0119b0634":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"161afc4e516a4436a7edd60c8fe12dbf":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"1d447ec86fe84008b29495ecb78a7fac":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1e2bf8bf2ab14c9e880c06b04f752a1b":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2e4df25efaa64b95acb29e7bce65e4c0":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"364f355213fd49e89373c5cc2bbbd646":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"425730d860514e2d87c0870cbb943842":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_06c58f8fc29343fa96e36d5b1f8dd078","IPY_MODEL_046fa73af99645cc88b49c0f3e5f96b7","IPY_MODEL_e256a26a0f41436a9755c56f3ffebd11"],"layout":"IPY_MODEL_1e2bf8bf2ab14c9e880c06b04f752a1b"}},"4904f0e333824ff39f40807d415b9af0":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"4d4babe9fcb24dd7996ecbeb7006018f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"520ae85fb0804dafa7c6a56a81b80769":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"617fefdfbf594f9d84b64528d58e391e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"61f277dcf14c4cc692c1cf6dd7c5a846":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_617fefdfbf594f9d84b64528d58e391e","placeholder":"β","style":"IPY_MODEL_364f355213fd49e89373c5cc2bbbd646","value":"Documents Processed: "}},"82055fffe4bc447492b9ab67d0d04ad2":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"88c675dce7bd4247842ffeb6470d31dd":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"898ea2f834cb444f920eeae85f941ebf":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"8f74df40a42443e1beda8e8f25d33c4d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_2e4df25efaa64b95acb29e7bce65e4c0","placeholder":"β","style":"IPY_MODEL_520ae85fb0804dafa7c6a56a81b80769","value":" 88/88 [01:28<00:00, 5.40it/s]"}},"8fc242cfcf074a0dbdd852a2d65d3c43":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_f8cd3a71bd724590bb22f01100931b30","max":88,"min":0,"orientation":"horizontal","style":"IPY_MODEL_04989dd1884b48c795cf59aa33686866","value":88}},"902784ed90204018afb1050e58ab5785":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"99cd62ad76d740d197ca16db71359c9f":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a44f766857084ecc95f9ea1ab879085e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ab5054496cae4e56b8f884db8cfa1cf7":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_61f277dcf14c4cc692c1cf6dd7c5a846","IPY_MODEL_c7a72de53d104ff2b470ffe9a24b5a05","IPY_MODEL_0f2a6092eb35478693982c6ba694eedf"],"layout":"IPY_MODEL_abe2fe0c05634127bc61ddae4ecbefe9"}},"abe2fe0c05634127bc61ddae4ecbefe9":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ae739e7eca68419ca55f741ee17e325c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"bb09ce6273944cd9be20a5d4730acfe5":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_ddd00b44cb994eaca361ee9d182854f5","IPY_MODEL_8fc242cfcf074a0dbdd852a2d65d3c43","IPY_MODEL_8f74df40a42443e1beda8e8f25d33c4d"],"layout":"IPY_MODEL_902784ed90204018afb1050e58ab5785"}},"c21f9accc8854d5fa3b60e4cd4891923":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_f620a1ca0d47465ba079ecc3518b7abe","placeholder":"β","style":"IPY_MODEL_a44f766857084ecc95f9ea1ab879085e","value":"Batches: 100%"}},"c7a72de53d104ff2b470ffe9a24b5a05":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_99cd62ad76d740d197ca16db71359c9f","max":2811,"min":0,"orientation":"horizontal","style":"IPY_MODEL_161afc4e516a4436a7edd60c8fe12dbf","value":2811}},"d03003493ce243d38512f5a3990a80f7":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d0921da424dd4f68bcf85099820d17ba":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_898ea2f834cb444f920eeae85f941ebf","placeholder":"β","style":"IPY_MODEL_82055fffe4bc447492b9ab67d0d04ad2","value":" 1/1 [00:00<00:00, 9.08it/s]"}},"d64dc173913146f0a6a413b158177ba5":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ddd00b44cb994eaca361ee9d182854f5":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e5586e38136f4bedb7f2c12e7d7993ee","placeholder":"β","style":"IPY_MODEL_02e6b8d39ac1478e8b831690d542937b","value":"Batches: 100%"}},"e004a6c61f2d4e1d8e9d02c51dcc6ebd":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"e13a9fe3eba54b7e8b5efe219f515f46":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_f3e510c515b2456a8025c4b2e71f0463","max":1,"min":0,"orientation":"horizontal","style":"IPY_MODEL_4904f0e333824ff39f40807d415b9af0","value":1}},"e256a26a0f41436a9755c56f3ffebd11":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_88c675dce7bd4247842ffeb6470d31dd","placeholder":"β","style":"IPY_MODEL_1d447ec86fe84008b29495ecb78a7fac","value":" 10000/? [00:11<00:00, 898.29it/s]"}},"e5586e38136f4bedb7f2c12e7d7993ee":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f3e510c515b2456a8025c4b2e71f0463":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f620a1ca0d47465ba079ecc3518b7abe":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f8cd3a71bd724590bb22f01100931b30":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ff4bc8be1b8041e6a116bc37e366bf96":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}}}}},"nbformat":4,"nbformat_minor":5}
|
|
|
|
notebooks/indexing_and_pipeline_creation.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/question_generation.ipynb
CHANGED
@@ -2,31 +2,36 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "markdown",
|
5 |
-
"
|
6 |
-
"# Question generation\n",
|
7 |
-
"Inspired by [this Haystack tutorial](https://haystack.deepset.ai/tutorials/question-generation)"
|
8 |
-
],
|
9 |
"metadata": {
|
10 |
"id": "iiNbGRn-KitL"
|
11 |
},
|
12 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
},
|
14 |
{
|
15 |
"cell_type": "markdown",
|
16 |
-
"
|
17 |
-
"## Preliminary operations"
|
18 |
-
],
|
19 |
"metadata": {
|
20 |
"id": "viixGIJcKPSQ"
|
21 |
},
|
22 |
-
"
|
|
|
|
|
23 |
},
|
24 |
{
|
25 |
"cell_type": "code",
|
26 |
-
"
|
27 |
-
|
28 |
-
"drive.mount('/content/drive')"
|
29 |
-
],
|
30 |
"metadata": {
|
31 |
"colab": {
|
32 |
"base_uri": "https://localhost:8080/"
|
@@ -34,40 +39,42 @@
|
|
34 |
"id": "MevE4jEZ5QBT",
|
35 |
"outputId": "136106e4-40c9-4443-ee84-784fb922e188"
|
36 |
},
|
37 |
-
"id": "MevE4jEZ5QBT",
|
38 |
-
"execution_count": 1,
|
39 |
"outputs": [
|
40 |
{
|
41 |
-
"output_type": "stream",
|
42 |
"name": "stdout",
|
|
|
43 |
"text": [
|
44 |
"Mounted at /content/drive\n"
|
45 |
]
|
46 |
}
|
|
|
|
|
|
|
|
|
47 |
]
|
48 |
},
|
49 |
{
|
50 |
"cell_type": "code",
|
51 |
-
"
|
52 |
-
|
53 |
-
"! pip install farm-haystack[faiss-gpu]"
|
54 |
-
],
|
55 |
"metadata": {
|
56 |
"id": "VYWRJ-Lf55nV"
|
57 |
},
|
58 |
-
"
|
59 |
-
"
|
60 |
-
|
|
|
|
|
61 |
},
|
62 |
{
|
63 |
"cell_type": "markdown",
|
64 |
-
"
|
65 |
-
"## Load data"
|
66 |
-
],
|
67 |
"metadata": {
|
68 |
"id": "QVDuHAMIK4bg"
|
69 |
},
|
70 |
-
"
|
|
|
|
|
71 |
},
|
72 |
{
|
73 |
"cell_type": "code",
|
@@ -124,9 +131,8 @@
|
|
124 |
},
|
125 |
{
|
126 |
"cell_type": "code",
|
127 |
-
"
|
128 |
-
|
129 |
-
],
|
130 |
"metadata": {
|
131 |
"colab": {
|
132 |
"base_uri": "https://localhost:8080/"
|
@@ -134,19 +140,20 @@
|
|
134 |
"id": "GR6qWQAn72WG",
|
135 |
"outputId": "1198a602-7f4e-444a-f8f4-05b488663799"
|
136 |
},
|
137 |
-
"id": "GR6qWQAn72WG",
|
138 |
-
"execution_count": 8,
|
139 |
"outputs": [
|
140 |
{
|
141 |
-
"output_type": "execute_result",
|
142 |
"data": {
|
143 |
"text/plain": [
|
144 |
"134"
|
145 |
]
|
146 |
},
|
|
|
147 |
"metadata": {},
|
148 |
-
"
|
149 |
}
|
|
|
|
|
|
|
150 |
]
|
151 |
},
|
152 |
{
|
@@ -154,6 +161,9 @@
|
|
154 |
"execution_count": 9,
|
155 |
"id": "aa231b94",
|
156 |
"metadata": {
|
|
|
|
|
|
|
157 |
"execution": {
|
158 |
"iopub.execute_input": "2022-01-09T08:40:48.796741Z",
|
159 |
"iopub.status.busy": "2022-01-09T08:40:48.796550Z",
|
@@ -161,23 +171,20 @@
|
|
161 |
"shell.execute_reply": "2022-01-09T08:40:48.804705Z",
|
162 |
"shell.execute_reply.started": "2022-01-09T08:40:48.796722Z"
|
163 |
},
|
164 |
-
"colab": {
|
165 |
-
"base_uri": "https://localhost:8080/"
|
166 |
-
},
|
167 |
"id": "aa231b94",
|
168 |
"outputId": "3d88f0a8-635d-419c-8660-f6c77803e369"
|
169 |
},
|
170 |
"outputs": [
|
171 |
{
|
172 |
-
"output_type": "execute_result",
|
173 |
"data": {
|
174 |
"text/plain": [
|
175 |
"{'content': 'Part 5\\nNot to be confused with Episode 5.\\n\"Part 5\" is the fifth episode of the 2017 series of Twin Peaks and the thirty-fifth episode of the franchise as a whole. It aired on June 4, 2017.\\nPlot\\n\"Case files.\"\\n βDale Cooper\\nGene and Jake sit in a car, the former on the phone with Lorraine, reporting on the situation with Dougie Jones. Frustrated, she sends the message \"2\" (leaving 159 characters to type) to her contact \"ARGENT\" which causes a device in Buenos Aires to ring and flash twice with its two red lights.\\nConstance Talbot, Detective Macklay, and Detective Harrison observe the John Doe in the morgue. Talbot confirms the decapitation as the man\\'s cause of death and presents a ring found inside the body. On it is an inscription that reads, \"To Dougie, with love, Janey-E.\"\\nCooper\\'s doppelganger sits in his jail cell and correctly predicts that his food is coming. He takes his food and goes to the mirror, noting that BOB is still with him.\\nAt his place of employment, Mike Nelson calls in Steven Burnett, who has applied for a job. Mike tells him that his resume is inadequate and his forms were filled out incorrectly, then kicks him out.\\nSheriff Frank Truman talks to Harry on the phone and is informed by Lucy Brennan that his wife, Doris, is coming to him. Doris tells him about her frustrations, including a leaky pipe.\\nJaney-E, Sonny Jim and Cooper leave the Jones home and Janey-E tells Cooper that he had won $425,000. He looks at Sonny Jim and begins to shed tears. On realization that Dougie\\'s car is not there, Janey-E begrudgingly takes Cooper to Dougie\\'s work.\\nGene and Jake check on Dougie\\'s car again, which still has not moved. A group of delinquent youths also drive by the car.\\nJaney-E drops Cooper off at work and he wanders, following the aim of a statue of a man carrying a revolver. He stands around until Dougie\\'s co-worker, Phil Bisby comes carrying coffee on his way to a board meeting. Following him into the elevator, Cooper takes one of the coffees and begins drinking the \"damn good Joe.\" It was Frank\\'s one who then takes a green tea latte instead while Darren is turned down by Rhonda and Bonnici next to Frank is served the eighth cup carried by Phil.\\nAnthony Sinclair tells Cooper that he has covered for Dougie\\'s absence and during the meeting, as Sinclair presents a report. When saying that there was no arson with Littlefield, Cooper blurts out \"He\\'s lying,\" but does not elaborate, causing the boss Bushnell Mullins to have \"Dougie\" meet with him after the meeting. Mullins questions his accusation and gives him case files to assess by the next day.\\nRodney and Bradley Mitchum come to the Silver Mustang Casino and in front of Candie, Mandie and Sandie punish Burns for Cooper\\'s win at the casino and replace Burns with Warrick, who they tell to inform them if Cooper ever returns to the casino.\\nWhile his mother is passed out on drugs, the little boy living in the home across from Dougie\\'s car goes to examine it. He is shooed away by the gang of youths, arriving in a loud black 1970 Dodge Charger, who try to steal the car. The bomb under Dougie\\'s car explodes, killing several members of the gang, and the boy runs back to his home. Hearing him coming back in, his mother slowly wakes up and stares at the door.\\nAn auto detailer informs Jade that he found a set of keys for the Great Northern Hotel in her car. Since they have an address on them, she puts them inside a mailbox for delivery.\\nNorma sorts through documents as Heidi is serving and Becky delivers bread to Toad and gets money from Shelly. Norma goes to Shelly, urging her to help Becky rather than continue to enable her. Becky takes the money to Steven and they snort a drug.\\nCooper is pushed out of the elevator at the end of the workday and he goes to the statue he saw that morning.\\nAt the Twin Peaks sheriff\\'s station, Hawk and Andy continue to sort through files.\\nJacoby starts up his webcast he hosts as \"Dr. Amp\" and it is viewed by Jerry Horneβwho smokes a jointβand Nadine Hurley. His broadcast ends with an advertisement for his golden shovels that he urges his viewers to buy to shovel themselves \"out of the shit and into the truth.\"\\nAt the Pentagon, Lieutenant Cynthia Knox informs Colonel Davis that they have received a match on Major Garland Briggs\\' fingerprints β the sixteenth match in 25 years β in Buckhorn, South Dakota. Davis doubts the legitimacy of the match but says that if it is indeed truly Briggs that has been identified, that the FBI must be informed.\\nThe band Trouble plays at the Roadhouse as Richard Horne smokes underneath a \\'no smoking\\' sign. Employee Federico asks him to quit and the off-duty Deputy Chad Broxford takes over but ends up taking a bribe from Horne. Charlotte, from the next table over with Elizabeth, asks him for a light, but he grabs her and threatens to rape her.\\nAgent Preston examines Cooper\\'s file and compares his fingerprints from before his 1989 disappearance and from the doppelganger\\'s booking at the federal prison.\\nWarden Murphy gives the doppelganger his phone call. However, the doppelganger dials a number that sets off the prison\\'s alarms and he says \"The cow jumped over the moon,\" before hanging up, stopping the alarms.\\nIn Buenos Aires, the device contacted before by Lorraine rings and flashes twice with its two red lights and then shrinks to a kind of seed.\\nCooper continues to observe the statue.\\nCredits\\n\\nStarring\\nKyle MacLachlan as Dale Cooper / Dale Cooper (doppelganger)\\nIn Alphabetical Order\\nJane Adams as Constance Talbot\\nMΓ€dchen Amick as Shelly\\nTammie Baird as Lorraine\\nChrysta Bell as FBI Agent Tammy Preston\\nJim Belushi as Bradley Mitchum\\nSean Bolger as Detailer\\nBrent Briscoe as Detective Dave Macklay\\nWes Brown as Darren\\nJuan Carlos Cantu as Officer Reynaldo\\nVincent Castellanos as Federico\\nBailey Chase as Detective Don Harrison\\nCandy Clark as Doris Truman\\nGrace Victoria Cox as Charlotte\\nGiselle Damier as Sandie\\nDavid Dastmalchian as Pit Boss Warrick\\nJosh Fadem as Phil Bisby\\nEamon Farren as Richard Horne\\nRobert Forster as Sheriff Frank Truman\\nPierce Gagnon as Sonny Jim Jones\\nHailey Gates as Drugged-out Mother\\nBrett Gelman as Supervisor Burns\\nHarry Goaz as Deputy Andy Brennan\\nHank Harris as Prison Tech\\nAndrea Hays as Heidi\\nGary Hershberger as Mike Nelson\\nMichael Horse as Deputy Chief Tommy \"Hawk\" Hill\\nErnie Hudson as Colonel Davis\\nCaleb Landry Jones as Steven Burnett\\nDavid Patrick Kelly as Jerry Horne\\nRobert Knepper as Rodney Mitchum\\nAndrea Leal as Mandie\\nSheryl Lee as Laura Palmer\\nJane Levy as Elizabeth\\nPeggy Lipton as Norma Jennings\\nKarl Makinen as Inspector Randy Hollister\\nJames Morrison as Warden Dwight Murphy\\nDon Murray as Bushnell Mullins\\nJohn Pirruccello as Deputy Chad Broxford\\nAdele RenΓ© as Lieutenant Cynthia Cox\\nKimmy Robertson as Lucy Brennan\\nWendy Robie as Nadine Hurley\\nMarv Rosand as Toad\\nElena Satine as Rhonda\\nAmanda Seyfried as Rebecca (Becky) Burnett\\nAmie Shiels as Candie\\nSawyer Shipman as Little Boy\\nFrank Silva as Bob\\nTom Sizemore as Anthony Sinclair\\nBob Stephenson as Frank\\nRuss Tamblyn as Dr. Lawrence Jacoby\\nBill Tangradi as Jake\\nGreg Vrotsos as Gene\\nNaomi Watts as Janey-E Jones\\nNafessa Williams as Jade\\nBlake Zingale as Punk Leader\\nTrouble:\\nRiley Lynch\\nSam Smith\\nAlex Zhang Hungtai\\nDean Hurley\\nUncredited\\nTyler Malik as stand-in\\nKenneth Welsh as Windom Earle (archive footage)\\nUnknown performer as Bonnici\\nUnknown performer as Woman in elevator\\nUnknown performer as Man across Mullins\\nUnknown performer as Woman at meeting\\nUnknown performer as Mullins\\' secretary\\nProduction staff\\nSee: Twin Peaks (2017) Β§ Production staff\\nFeatured music\\n\"The Flame\"\\nWritten and performed by Johnny Jewel\\nCourtesy of Italians Do It Better\\n\"Frank 2000\"\\nWritten by Angelo Badalamenti and David Lynch\\nPerformed by Thought Gang\\n\"I Love How You Love Me\"\\nWritten by Barry Mann and Larry Kolber\\nPerformed by The Paris Sisters\\nPublished by Screen Gems-EMI Music Inc. (BMI)\\n\"I Am\"\\nWritten and performed by BluntedBeatz\\n\"Stars And Stripes Forever\"\\nWritten by John Philip Sousa\\nPerformed and arranged by the U.S. Army Band\\n\"Snake Eyes\"\\nWritten by Dean Hurley, Riley Lynch and Alex Zhang Hungtai\\nPerformed by Trouble\\n\"Habit\" and \"Tabloid\"\\nWritten and performed by Uniform\\nCourtesy of Sacred Bones Records\\n\"Windswept\"\\nWritten and performed by Johnny Jewel\\nCourtesy of Italians Do It Better\\nNotes\\nThis episode was dedicated to the memory of Marv Rosand.\\nAmy Shiels is credited as \"Amie\".\\nFrank, Dougie\\'s coworker who discovers he likes green tea lattes, is played by Bob Stephenson, who appeared in Episode 6 as the burger cook at the Double R Diner. This was Stephenson\\'s first acting gig.\\nUpon release, Twin Peaks: The Return earned some criticism for earning the \"Empty Cup Award,\" a satirical achievement for television series where actors handle coffee cups that are claimed to be full in the dialogue but are very clearly empty based on how they are handled by the performers. In the case of this episode, however, Kyle MacLachlan was given some praise for being the sole actor to handle his cup as though it were actually full, especially in an episode where a character (Phil Bisby) unrealistically balances two full trays of coffee while running around.\\nThe statue in front of Dougie\\'s workplace was not originally part of location and was brought by the production staff. It might be a statue of Donald Lynch, father of David Lynch, since according to the stand-in Tyler Malkin, Lynch talked to it saying \"Hi, Dad\".\\nThe numbers input by the doppelganger during his phone call are, using the standard DTMF tones pitched up 2 octaves for offscreen ones:\\n16 (pause) 1235789 (computer modem response) 3135378912315 (01189998819991197253 offscreen)\\nThis could be interpreted as two numbers dialing to get an outside line from the internal prison phone system, then a 7 digit local number calling a computer set up beforehand with a local number so it would be a free local call and finally a code that triggers a pre-planned, automated hack of the prison systems.',\n",
|
176 |
" 'meta': {'name': 'Part_5', 'url': 'https://twinpeaks.fandom.com/wiki/Part_5'}}"
|
177 |
]
|
178 |
},
|
|
|
179 |
"metadata": {},
|
180 |
-
"
|
181 |
}
|
182 |
],
|
183 |
"source": [
|
@@ -186,20 +193,23 @@
|
|
186 |
},
|
187 |
{
|
188 |
"cell_type": "markdown",
|
189 |
-
"
|
190 |
-
"## Define document store ([FAISS](https://github.com/facebookresearch/faiss)) and write documents\n",
|
191 |
-
"\n"
|
192 |
-
],
|
193 |
"metadata": {
|
194 |
"id": "Yu3bAUPoLrPI"
|
195 |
},
|
196 |
-
"
|
|
|
|
|
|
|
197 |
},
|
198 |
{
|
199 |
"cell_type": "code",
|
200 |
"execution_count": 10,
|
201 |
"id": "bfe846df",
|
202 |
"metadata": {
|
|
|
|
|
|
|
203 |
"execution": {
|
204 |
"iopub.execute_input": "2022-01-09T08:40:59.678181Z",
|
205 |
"iopub.status.busy": "2022-01-09T08:40:59.678003Z",
|
@@ -208,15 +218,12 @@
|
|
208 |
"shell.execute_reply.started": "2022-01-09T08:40:59.678161Z"
|
209 |
},
|
210 |
"id": "bfe846df",
|
211 |
-
"colab": {
|
212 |
-
"base_uri": "https://localhost:8080/"
|
213 |
-
},
|
214 |
"outputId": "be9c9ef8-bcc4-4c4a-e7a5-9003077a7ea3"
|
215 |
},
|
216 |
"outputs": [
|
217 |
{
|
218 |
-
"output_type": "stream",
|
219 |
"name": "stderr",
|
|
|
220 |
"text": [
|
221 |
"INFO - haystack.modeling.model.optimization - apex not found, won't use it. See https://nvidia.github.io/apex/\n",
|
222 |
"ERROR - root - Failed to import 'magic' (from 'python-magic' and 'python-magic-bin' on Windows). FileTypeClassifier will not perform mimetype detection on extensionless files. Please make sure the necessary OS libraries are installed if you need this functionality.\n",
|
@@ -238,13 +245,6 @@
|
|
238 |
"execution_count": 11,
|
239 |
"id": "191144b4",
|
240 |
"metadata": {
|
241 |
-
"execution": {
|
242 |
-
"iopub.execute_input": "2022-01-09T08:41:10.695292Z",
|
243 |
-
"iopub.status.busy": "2022-01-09T08:41:10.695064Z",
|
244 |
-
"iopub.status.idle": "2022-01-09T08:41:22.144864Z",
|
245 |
-
"shell.execute_reply": "2022-01-09T08:41:22.144203Z",
|
246 |
-
"shell.execute_reply.started": "2022-01-09T08:41:10.695271Z"
|
247 |
-
},
|
248 |
"colab": {
|
249 |
"base_uri": "https://localhost:8080/",
|
250 |
"height": 49,
|
@@ -262,23 +262,30 @@
|
|
262 |
"3739785a410b4eadb0c36881e41b88ed"
|
263 |
]
|
264 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
265 |
"id": "191144b4",
|
266 |
"outputId": "f564e88e-be20-4f20-b4ed-4c663bfd71ac"
|
267 |
},
|
268 |
"outputs": [
|
269 |
{
|
270 |
-
"output_type": "display_data",
|
271 |
"data": {
|
272 |
-
"text/plain": [
|
273 |
-
"Writing Documents: 0%| | 0/134 [00:00<?, ?it/s]"
|
274 |
-
],
|
275 |
"application/vnd.jupyter.widget-view+json": {
|
|
|
276 |
"version_major": 2,
|
277 |
-
"version_minor": 0
|
278 |
-
|
279 |
-
|
|
|
|
|
280 |
},
|
281 |
-
"metadata": {}
|
|
|
282 |
}
|
283 |
],
|
284 |
"source": [
|
@@ -288,9 +295,8 @@
|
|
288 |
},
|
289 |
{
|
290 |
"cell_type": "code",
|
291 |
-
"
|
292 |
-
|
293 |
-
],
|
294 |
"metadata": {
|
295 |
"colab": {
|
296 |
"base_uri": "https://localhost:8080/"
|
@@ -298,92 +304,71 @@
|
|
298 |
"id": "fCVArUtw1rV5",
|
299 |
"outputId": "852fc261-c684-4f65-d17f-011f2b1860d0"
|
300 |
},
|
301 |
-
"id": "fCVArUtw1rV5",
|
302 |
-
"execution_count": 23,
|
303 |
"outputs": [
|
304 |
{
|
305 |
-
"output_type": "execute_result",
|
306 |
"data": {
|
307 |
"text/plain": [
|
308 |
"134"
|
309 |
]
|
310 |
},
|
|
|
311 |
"metadata": {},
|
312 |
-
"
|
313 |
}
|
|
|
|
|
|
|
314 |
]
|
315 |
},
|
316 |
{
|
317 |
"cell_type": "markdown",
|
318 |
-
"
|
319 |
-
"## Generate questions and save them"
|
320 |
-
],
|
321 |
"metadata": {
|
322 |
"id": "oPOm5UsJxG37"
|
323 |
},
|
324 |
-
"
|
|
|
|
|
325 |
},
|
326 |
{
|
327 |
"cell_type": "code",
|
|
|
|
|
|
|
|
|
|
|
|
|
328 |
"source": [
|
329 |
"from haystack.nodes import QuestionGenerator\n",
|
330 |
"from haystack.pipelines import QuestionGenerationPipeline\n",
|
331 |
"from haystack.utils import launch_es, print_questions\n"
|
332 |
-
]
|
333 |
-
"metadata": {
|
334 |
-
"id": "dYf2rgwzxa1z"
|
335 |
-
},
|
336 |
-
"id": "dYf2rgwzxa1z",
|
337 |
-
"execution_count": 16,
|
338 |
-
"outputs": []
|
339 |
},
|
340 |
{
|
341 |
"cell_type": "code",
|
342 |
-
"
|
343 |
-
|
344 |
-
],
|
345 |
"metadata": {
|
346 |
"id": "EnQk6d0FzlnT"
|
347 |
},
|
348 |
-
"
|
349 |
-
"
|
350 |
-
|
|
|
351 |
},
|
352 |
{
|
353 |
"cell_type": "code",
|
354 |
-
"
|
355 |
-
|
356 |
-
"question_generator = QuestionGenerator()\n",
|
357 |
-
"\n",
|
358 |
-
"question_generation_pipeline = QuestionGenerationPipeline(question_generator)\n",
|
359 |
-
"for idx, document in enumerate(document_store):\n",
|
360 |
-
" if idx%5==0:\n",
|
361 |
-
" print(idx/len(docs)*100)\n",
|
362 |
-
" results = question_generation_pipeline.run(documents=[document])\n",
|
363 |
-
"\n",
|
364 |
-
" # save to file\n",
|
365 |
-
" questions_for_doc=f'{idx}: {document.content[:100]}...\\n'+'-'*15+'\\n'\n",
|
366 |
-
" if \"generated_questions\" in results.keys():\n",
|
367 |
-
" for result in results[\"generated_questions\"]:\n",
|
368 |
-
" for question in result[\"questions\"]:\n",
|
369 |
-
" questions_for_doc+=(f\" - {question}\\n\")\n",
|
370 |
-
" with open(OUTPUT_QUESTIONS_FILE,'a+') as fo:\n",
|
371 |
-
" fo.write(questions_for_doc)"
|
372 |
-
],
|
373 |
"metadata": {
|
374 |
-
"id": "8fYMVd_ggJnw",
|
375 |
"colab": {
|
376 |
"base_uri": "https://localhost:8080/"
|
377 |
},
|
|
|
378 |
"outputId": "880c4204-854c-49fc-f82b-ec008076b662"
|
379 |
},
|
380 |
-
"id": "8fYMVd_ggJnw",
|
381 |
-
"execution_count": 24,
|
382 |
"outputs": [
|
383 |
{
|
384 |
-
"metadata": {
|
385 |
-
"tags": null
|
386 |
-
},
|
387 |
"name": "stderr",
|
388 |
"output_type": "stream",
|
389 |
"text": [
|
@@ -392,8 +377,8 @@
|
|
392 |
]
|
393 |
},
|
394 |
{
|
395 |
-
"output_type": "stream",
|
396 |
"name": "stdout",
|
|
|
397 |
"text": [
|
398 |
"0.0\n",
|
399 |
"3.731343283582089\n",
|
@@ -422,14 +407,31 @@
|
|
422 |
"89.55223880597015\n"
|
423 |
]
|
424 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
425 |
]
|
426 |
},
|
427 |
{
|
428 |
"cell_type": "code",
|
429 |
-
"
|
430 |
-
|
431 |
-
"print_questions(results)"
|
432 |
-
],
|
433 |
"metadata": {
|
434 |
"colab": {
|
435 |
"base_uri": "https://localhost:8080/"
|
@@ -437,12 +439,10 @@
|
|
437 |
"id": "1i2C6PhSKHZY",
|
438 |
"outputId": "ba202bf9-b445-43ca-c267-7fd873a80e81"
|
439 |
},
|
440 |
-
"id": "1i2C6PhSKHZY",
|
441 |
-
"execution_count": 30,
|
442 |
"outputs": [
|
443 |
{
|
444 |
-
"output_type": "stream",
|
445 |
"name": "stdout",
|
|
|
446 |
"text": [
|
447 |
"124: Jerry Horne\n",
|
448 |
"Jeremy \"Jerry\" Horne was the playboy brother of Benjamin Horne and the uncle of Audrey a...\n",
|
@@ -670,22 +670,31 @@
|
|
670 |
" - What actor reprised the role in the 2017 revival?\n"
|
671 |
]
|
672 |
}
|
|
|
|
|
|
|
|
|
673 |
]
|
674 |
},
|
675 |
{
|
676 |
"cell_type": "code",
|
677 |
-
"
|
678 |
-
|
679 |
-
],
|
680 |
"metadata": {
|
681 |
"id": "QN9xxnTrKLTX"
|
682 |
},
|
683 |
-
"
|
684 |
-
"
|
685 |
-
"outputs": []
|
686 |
}
|
687 |
],
|
688 |
"metadata": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
689 |
"kernelspec": {
|
690 |
"display_name": "saturn (Python 3)",
|
691 |
"language": "python",
|
@@ -703,62 +712,12 @@
|
|
703 |
"pygments_lexer": "ipython3",
|
704 |
"version": "3.9.5"
|
705 |
},
|
706 |
-
"colab": {
|
707 |
-
"name": "question_generation.ipynb",
|
708 |
-
"provenance": [],
|
709 |
-
"collapsed_sections": [],
|
710 |
-
"toc_visible": true
|
711 |
-
},
|
712 |
-
"accelerator": "GPU",
|
713 |
"widgets": {
|
714 |
"application/vnd.jupyter.widget-state+json": {
|
715 |
-
"81c8d8eb80d64687bdcc31ab1e3f156e": {
|
716 |
-
"model_module": "@jupyter-widgets/controls",
|
717 |
-
"model_name": "HBoxModel",
|
718 |
-
"model_module_version": "1.5.0",
|
719 |
-
"state": {
|
720 |
-
"_dom_classes": [],
|
721 |
-
"_model_module": "@jupyter-widgets/controls",
|
722 |
-
"_model_module_version": "1.5.0",
|
723 |
-
"_model_name": "HBoxModel",
|
724 |
-
"_view_count": null,
|
725 |
-
"_view_module": "@jupyter-widgets/controls",
|
726 |
-
"_view_module_version": "1.5.0",
|
727 |
-
"_view_name": "HBoxView",
|
728 |
-
"box_style": "",
|
729 |
-
"children": [
|
730 |
-
"IPY_MODEL_b18ba72ca8e94c508fde617b04b82273",
|
731 |
-
"IPY_MODEL_0cc8731e80994ab097236f295da512c7",
|
732 |
-
"IPY_MODEL_e1b08799bca14b7aa8aed017b5545923"
|
733 |
-
],
|
734 |
-
"layout": "IPY_MODEL_4ee263ae48834c5dbb138a5dbe2183bd"
|
735 |
-
}
|
736 |
-
},
|
737 |
-
"b18ba72ca8e94c508fde617b04b82273": {
|
738 |
-
"model_module": "@jupyter-widgets/controls",
|
739 |
-
"model_name": "HTMLModel",
|
740 |
-
"model_module_version": "1.5.0",
|
741 |
-
"state": {
|
742 |
-
"_dom_classes": [],
|
743 |
-
"_model_module": "@jupyter-widgets/controls",
|
744 |
-
"_model_module_version": "1.5.0",
|
745 |
-
"_model_name": "HTMLModel",
|
746 |
-
"_view_count": null,
|
747 |
-
"_view_module": "@jupyter-widgets/controls",
|
748 |
-
"_view_module_version": "1.5.0",
|
749 |
-
"_view_name": "HTMLView",
|
750 |
-
"description": "",
|
751 |
-
"description_tooltip": null,
|
752 |
-
"layout": "IPY_MODEL_ae4e66819ab04f68a867768d03bc4a04",
|
753 |
-
"placeholder": "β",
|
754 |
-
"style": "IPY_MODEL_6bba9b7051f64993a477688eb8c6ed92",
|
755 |
-
"value": "Writing Documents: "
|
756 |
-
}
|
757 |
-
},
|
758 |
"0cc8731e80994ab097236f295da512c7": {
|
759 |
"model_module": "@jupyter-widgets/controls",
|
760 |
-
"model_name": "FloatProgressModel",
|
761 |
"model_module_version": "1.5.0",
|
|
|
762 |
"state": {
|
763 |
"_dom_classes": [],
|
764 |
"_model_module": "@jupyter-widgets/controls",
|
@@ -779,31 +738,25 @@
|
|
779 |
"value": 134
|
780 |
}
|
781 |
},
|
782 |
-
"
|
783 |
"model_module": "@jupyter-widgets/controls",
|
784 |
-
"model_name": "HTMLModel",
|
785 |
"model_module_version": "1.5.0",
|
|
|
786 |
"state": {
|
787 |
-
"_dom_classes": [],
|
788 |
"_model_module": "@jupyter-widgets/controls",
|
789 |
"_model_module_version": "1.5.0",
|
790 |
-
"_model_name": "
|
791 |
"_view_count": null,
|
792 |
-
"_view_module": "@jupyter-widgets/
|
793 |
-
"_view_module_version": "1.
|
794 |
-
"_view_name": "
|
795 |
-
"
|
796 |
-
"description_tooltip": null,
|
797 |
-
"layout": "IPY_MODEL_887b5bcc8d1e4c38b40700d324424f33",
|
798 |
-
"placeholder": "β",
|
799 |
-
"style": "IPY_MODEL_3739785a410b4eadb0c36881e41b88ed",
|
800 |
-
"value": " 10000/? [00:00<00:00, 14955.47it/s]"
|
801 |
}
|
802 |
},
|
803 |
"4ee263ae48834c5dbb138a5dbe2183bd": {
|
804 |
"model_module": "@jupyter-widgets/base",
|
805 |
-
"model_name": "LayoutModel",
|
806 |
"model_module_version": "1.2.0",
|
|
|
807 |
"state": {
|
808 |
"_model_module": "@jupyter-widgets/base",
|
809 |
"_model_module_version": "1.2.0",
|
@@ -852,10 +805,47 @@
|
|
852 |
"width": null
|
853 |
}
|
854 |
},
|
855 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
856 |
"model_module": "@jupyter-widgets/base",
|
857 |
-
"model_name": "LayoutModel",
|
858 |
"model_module_version": "1.2.0",
|
|
|
859 |
"state": {
|
860 |
"_model_module": "@jupyter-widgets/base",
|
861 |
"_model_module_version": "1.2.0",
|
@@ -904,25 +894,10 @@
|
|
904 |
"width": null
|
905 |
}
|
906 |
},
|
907 |
-
"6bba9b7051f64993a477688eb8c6ed92": {
|
908 |
-
"model_module": "@jupyter-widgets/controls",
|
909 |
-
"model_name": "DescriptionStyleModel",
|
910 |
-
"model_module_version": "1.5.0",
|
911 |
-
"state": {
|
912 |
-
"_model_module": "@jupyter-widgets/controls",
|
913 |
-
"_model_module_version": "1.5.0",
|
914 |
-
"_model_name": "DescriptionStyleModel",
|
915 |
-
"_view_count": null,
|
916 |
-
"_view_module": "@jupyter-widgets/base",
|
917 |
-
"_view_module_version": "1.2.0",
|
918 |
-
"_view_name": "StyleView",
|
919 |
-
"description_width": ""
|
920 |
-
}
|
921 |
-
},
|
922 |
"96e28522b93b4c77a9d6da982d601465": {
|
923 |
"model_module": "@jupyter-widgets/base",
|
924 |
-
"model_name": "LayoutModel",
|
925 |
"model_module_version": "1.2.0",
|
|
|
926 |
"state": {
|
927 |
"_model_module": "@jupyter-widgets/base",
|
928 |
"_model_module_version": "1.2.0",
|
@@ -973,8 +948,8 @@
|
|
973 |
},
|
974 |
"9eb3c190c28b47618572ecd9e3b80932": {
|
975 |
"model_module": "@jupyter-widgets/controls",
|
976 |
-
"model_name": "ProgressStyleModel",
|
977 |
"model_module_version": "1.5.0",
|
|
|
978 |
"state": {
|
979 |
"_model_module": "@jupyter-widgets/controls",
|
980 |
"_model_module_version": "1.5.0",
|
@@ -987,10 +962,10 @@
|
|
987 |
"description_width": ""
|
988 |
}
|
989 |
},
|
990 |
-
"
|
991 |
"model_module": "@jupyter-widgets/base",
|
992 |
-
"model_name": "LayoutModel",
|
993 |
"model_module_version": "1.2.0",
|
|
|
994 |
"state": {
|
995 |
"_model_module": "@jupyter-widgets/base",
|
996 |
"_model_module_version": "1.2.0",
|
@@ -1039,19 +1014,46 @@
|
|
1039 |
"width": null
|
1040 |
}
|
1041 |
},
|
1042 |
-
"
|
1043 |
"model_module": "@jupyter-widgets/controls",
|
1044 |
-
"model_name": "DescriptionStyleModel",
|
1045 |
"model_module_version": "1.5.0",
|
|
|
1046 |
"state": {
|
|
|
1047 |
"_model_module": "@jupyter-widgets/controls",
|
1048 |
"_model_module_version": "1.5.0",
|
1049 |
-
"_model_name": "
|
1050 |
"_view_count": null,
|
1051 |
-
"_view_module": "@jupyter-widgets/
|
1052 |
-
"_view_module_version": "1.
|
1053 |
-
"_view_name": "
|
1054 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1055 |
}
|
1056 |
}
|
1057 |
}
|
@@ -1059,4 +1061,4 @@
|
|
1059 |
},
|
1060 |
"nbformat": 4,
|
1061 |
"nbformat_minor": 5
|
1062 |
-
}
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "markdown",
|
5 |
+
"id": "iiNbGRn-KitL",
|
|
|
|
|
|
|
6 |
"metadata": {
|
7 |
"id": "iiNbGRn-KitL"
|
8 |
},
|
9 |
+
"source": [
|
10 |
+
"# Question generation\n",
|
11 |
+
"This notebook is inspired by [Question Generation tutorial](https://haystack.deepset.ai/tutorials/question-generation), from Haystack documentation.\n",
|
12 |
+
"\n",
|
13 |
+
"Here we use a collection of articles about Twin Peaks to generate a variety of questions about that awesome TV series!\n",
|
14 |
+
"\n",
|
15 |
+
"The following steps are performed:\n",
|
16 |
+
"* load data\n",
|
17 |
+
"* create document store and write documents\n",
|
18 |
+
"* generate questions and save them"
|
19 |
+
]
|
20 |
},
|
21 |
{
|
22 |
"cell_type": "markdown",
|
23 |
+
"id": "viixGIJcKPSQ",
|
|
|
|
|
24 |
"metadata": {
|
25 |
"id": "viixGIJcKPSQ"
|
26 |
},
|
27 |
+
"source": [
|
28 |
+
"## Preliminary operations"
|
29 |
+
]
|
30 |
},
|
31 |
{
|
32 |
"cell_type": "code",
|
33 |
+
"execution_count": 1,
|
34 |
+
"id": "MevE4jEZ5QBT",
|
|
|
|
|
35 |
"metadata": {
|
36 |
"colab": {
|
37 |
"base_uri": "https://localhost:8080/"
|
|
|
39 |
"id": "MevE4jEZ5QBT",
|
40 |
"outputId": "136106e4-40c9-4443-ee84-784fb922e188"
|
41 |
},
|
|
|
|
|
42 |
"outputs": [
|
43 |
{
|
|
|
44 |
"name": "stdout",
|
45 |
+
"output_type": "stream",
|
46 |
"text": [
|
47 |
"Mounted at /content/drive\n"
|
48 |
]
|
49 |
}
|
50 |
+
],
|
51 |
+
"source": [
|
52 |
+
"from google.colab import drive\n",
|
53 |
+
"drive.mount('/content/drive')"
|
54 |
]
|
55 |
},
|
56 |
{
|
57 |
"cell_type": "code",
|
58 |
+
"execution_count": null,
|
59 |
+
"id": "VYWRJ-Lf55nV",
|
|
|
|
|
60 |
"metadata": {
|
61 |
"id": "VYWRJ-Lf55nV"
|
62 |
},
|
63 |
+
"outputs": [],
|
64 |
+
"source": [
|
65 |
+
"# install dependencies\n",
|
66 |
+
"! pip install farm-haystack[faiss-gpu]==1.4.0"
|
67 |
+
]
|
68 |
},
|
69 |
{
|
70 |
"cell_type": "markdown",
|
71 |
+
"id": "QVDuHAMIK4bg",
|
|
|
|
|
72 |
"metadata": {
|
73 |
"id": "QVDuHAMIK4bg"
|
74 |
},
|
75 |
+
"source": [
|
76 |
+
"## Load data"
|
77 |
+
]
|
78 |
},
|
79 |
{
|
80 |
"cell_type": "code",
|
|
|
131 |
},
|
132 |
{
|
133 |
"cell_type": "code",
|
134 |
+
"execution_count": 8,
|
135 |
+
"id": "GR6qWQAn72WG",
|
|
|
136 |
"metadata": {
|
137 |
"colab": {
|
138 |
"base_uri": "https://localhost:8080/"
|
|
|
140 |
"id": "GR6qWQAn72WG",
|
141 |
"outputId": "1198a602-7f4e-444a-f8f4-05b488663799"
|
142 |
},
|
|
|
|
|
143 |
"outputs": [
|
144 |
{
|
|
|
145 |
"data": {
|
146 |
"text/plain": [
|
147 |
"134"
|
148 |
]
|
149 |
},
|
150 |
+
"execution_count": 8,
|
151 |
"metadata": {},
|
152 |
+
"output_type": "execute_result"
|
153 |
}
|
154 |
+
],
|
155 |
+
"source": [
|
156 |
+
"len(docs)"
|
157 |
]
|
158 |
},
|
159 |
{
|
|
|
161 |
"execution_count": 9,
|
162 |
"id": "aa231b94",
|
163 |
"metadata": {
|
164 |
+
"colab": {
|
165 |
+
"base_uri": "https://localhost:8080/"
|
166 |
+
},
|
167 |
"execution": {
|
168 |
"iopub.execute_input": "2022-01-09T08:40:48.796741Z",
|
169 |
"iopub.status.busy": "2022-01-09T08:40:48.796550Z",
|
|
|
171 |
"shell.execute_reply": "2022-01-09T08:40:48.804705Z",
|
172 |
"shell.execute_reply.started": "2022-01-09T08:40:48.796722Z"
|
173 |
},
|
|
|
|
|
|
|
174 |
"id": "aa231b94",
|
175 |
"outputId": "3d88f0a8-635d-419c-8660-f6c77803e369"
|
176 |
},
|
177 |
"outputs": [
|
178 |
{
|
|
|
179 |
"data": {
|
180 |
"text/plain": [
|
181 |
"{'content': 'Part 5\\nNot to be confused with Episode 5.\\n\"Part 5\" is the fifth episode of the 2017 series of Twin Peaks and the thirty-fifth episode of the franchise as a whole. It aired on June 4, 2017.\\nPlot\\n\"Case files.\"\\n βDale Cooper\\nGene and Jake sit in a car, the former on the phone with Lorraine, reporting on the situation with Dougie Jones. Frustrated, she sends the message \"2\" (leaving 159 characters to type) to her contact \"ARGENT\" which causes a device in Buenos Aires to ring and flash twice with its two red lights.\\nConstance Talbot, Detective Macklay, and Detective Harrison observe the John Doe in the morgue. Talbot confirms the decapitation as the man\\'s cause of death and presents a ring found inside the body. On it is an inscription that reads, \"To Dougie, with love, Janey-E.\"\\nCooper\\'s doppelganger sits in his jail cell and correctly predicts that his food is coming. He takes his food and goes to the mirror, noting that BOB is still with him.\\nAt his place of employment, Mike Nelson calls in Steven Burnett, who has applied for a job. Mike tells him that his resume is inadequate and his forms were filled out incorrectly, then kicks him out.\\nSheriff Frank Truman talks to Harry on the phone and is informed by Lucy Brennan that his wife, Doris, is coming to him. Doris tells him about her frustrations, including a leaky pipe.\\nJaney-E, Sonny Jim and Cooper leave the Jones home and Janey-E tells Cooper that he had won $425,000. He looks at Sonny Jim and begins to shed tears. On realization that Dougie\\'s car is not there, Janey-E begrudgingly takes Cooper to Dougie\\'s work.\\nGene and Jake check on Dougie\\'s car again, which still has not moved. A group of delinquent youths also drive by the car.\\nJaney-E drops Cooper off at work and he wanders, following the aim of a statue of a man carrying a revolver. He stands around until Dougie\\'s co-worker, Phil Bisby comes carrying coffee on his way to a board meeting. Following him into the elevator, Cooper takes one of the coffees and begins drinking the \"damn good Joe.\" It was Frank\\'s one who then takes a green tea latte instead while Darren is turned down by Rhonda and Bonnici next to Frank is served the eighth cup carried by Phil.\\nAnthony Sinclair tells Cooper that he has covered for Dougie\\'s absence and during the meeting, as Sinclair presents a report. When saying that there was no arson with Littlefield, Cooper blurts out \"He\\'s lying,\" but does not elaborate, causing the boss Bushnell Mullins to have \"Dougie\" meet with him after the meeting. Mullins questions his accusation and gives him case files to assess by the next day.\\nRodney and Bradley Mitchum come to the Silver Mustang Casino and in front of Candie, Mandie and Sandie punish Burns for Cooper\\'s win at the casino and replace Burns with Warrick, who they tell to inform them if Cooper ever returns to the casino.\\nWhile his mother is passed out on drugs, the little boy living in the home across from Dougie\\'s car goes to examine it. He is shooed away by the gang of youths, arriving in a loud black 1970 Dodge Charger, who try to steal the car. The bomb under Dougie\\'s car explodes, killing several members of the gang, and the boy runs back to his home. Hearing him coming back in, his mother slowly wakes up and stares at the door.\\nAn auto detailer informs Jade that he found a set of keys for the Great Northern Hotel in her car. Since they have an address on them, she puts them inside a mailbox for delivery.\\nNorma sorts through documents as Heidi is serving and Becky delivers bread to Toad and gets money from Shelly. Norma goes to Shelly, urging her to help Becky rather than continue to enable her. Becky takes the money to Steven and they snort a drug.\\nCooper is pushed out of the elevator at the end of the workday and he goes to the statue he saw that morning.\\nAt the Twin Peaks sheriff\\'s station, Hawk and Andy continue to sort through files.\\nJacoby starts up his webcast he hosts as \"Dr. Amp\" and it is viewed by Jerry Horneβwho smokes a jointβand Nadine Hurley. His broadcast ends with an advertisement for his golden shovels that he urges his viewers to buy to shovel themselves \"out of the shit and into the truth.\"\\nAt the Pentagon, Lieutenant Cynthia Knox informs Colonel Davis that they have received a match on Major Garland Briggs\\' fingerprints β the sixteenth match in 25 years β in Buckhorn, South Dakota. Davis doubts the legitimacy of the match but says that if it is indeed truly Briggs that has been identified, that the FBI must be informed.\\nThe band Trouble plays at the Roadhouse as Richard Horne smokes underneath a \\'no smoking\\' sign. Employee Federico asks him to quit and the off-duty Deputy Chad Broxford takes over but ends up taking a bribe from Horne. Charlotte, from the next table over with Elizabeth, asks him for a light, but he grabs her and threatens to rape her.\\nAgent Preston examines Cooper\\'s file and compares his fingerprints from before his 1989 disappearance and from the doppelganger\\'s booking at the federal prison.\\nWarden Murphy gives the doppelganger his phone call. However, the doppelganger dials a number that sets off the prison\\'s alarms and he says \"The cow jumped over the moon,\" before hanging up, stopping the alarms.\\nIn Buenos Aires, the device contacted before by Lorraine rings and flashes twice with its two red lights and then shrinks to a kind of seed.\\nCooper continues to observe the statue.\\nCredits\\n\\nStarring\\nKyle MacLachlan as Dale Cooper / Dale Cooper (doppelganger)\\nIn Alphabetical Order\\nJane Adams as Constance Talbot\\nMΓ€dchen Amick as Shelly\\nTammie Baird as Lorraine\\nChrysta Bell as FBI Agent Tammy Preston\\nJim Belushi as Bradley Mitchum\\nSean Bolger as Detailer\\nBrent Briscoe as Detective Dave Macklay\\nWes Brown as Darren\\nJuan Carlos Cantu as Officer Reynaldo\\nVincent Castellanos as Federico\\nBailey Chase as Detective Don Harrison\\nCandy Clark as Doris Truman\\nGrace Victoria Cox as Charlotte\\nGiselle Damier as Sandie\\nDavid Dastmalchian as Pit Boss Warrick\\nJosh Fadem as Phil Bisby\\nEamon Farren as Richard Horne\\nRobert Forster as Sheriff Frank Truman\\nPierce Gagnon as Sonny Jim Jones\\nHailey Gates as Drugged-out Mother\\nBrett Gelman as Supervisor Burns\\nHarry Goaz as Deputy Andy Brennan\\nHank Harris as Prison Tech\\nAndrea Hays as Heidi\\nGary Hershberger as Mike Nelson\\nMichael Horse as Deputy Chief Tommy \"Hawk\" Hill\\nErnie Hudson as Colonel Davis\\nCaleb Landry Jones as Steven Burnett\\nDavid Patrick Kelly as Jerry Horne\\nRobert Knepper as Rodney Mitchum\\nAndrea Leal as Mandie\\nSheryl Lee as Laura Palmer\\nJane Levy as Elizabeth\\nPeggy Lipton as Norma Jennings\\nKarl Makinen as Inspector Randy Hollister\\nJames Morrison as Warden Dwight Murphy\\nDon Murray as Bushnell Mullins\\nJohn Pirruccello as Deputy Chad Broxford\\nAdele RenΓ© as Lieutenant Cynthia Cox\\nKimmy Robertson as Lucy Brennan\\nWendy Robie as Nadine Hurley\\nMarv Rosand as Toad\\nElena Satine as Rhonda\\nAmanda Seyfried as Rebecca (Becky) Burnett\\nAmie Shiels as Candie\\nSawyer Shipman as Little Boy\\nFrank Silva as Bob\\nTom Sizemore as Anthony Sinclair\\nBob Stephenson as Frank\\nRuss Tamblyn as Dr. Lawrence Jacoby\\nBill Tangradi as Jake\\nGreg Vrotsos as Gene\\nNaomi Watts as Janey-E Jones\\nNafessa Williams as Jade\\nBlake Zingale as Punk Leader\\nTrouble:\\nRiley Lynch\\nSam Smith\\nAlex Zhang Hungtai\\nDean Hurley\\nUncredited\\nTyler Malik as stand-in\\nKenneth Welsh as Windom Earle (archive footage)\\nUnknown performer as Bonnici\\nUnknown performer as Woman in elevator\\nUnknown performer as Man across Mullins\\nUnknown performer as Woman at meeting\\nUnknown performer as Mullins\\' secretary\\nProduction staff\\nSee: Twin Peaks (2017) Β§ Production staff\\nFeatured music\\n\"The Flame\"\\nWritten and performed by Johnny Jewel\\nCourtesy of Italians Do It Better\\n\"Frank 2000\"\\nWritten by Angelo Badalamenti and David Lynch\\nPerformed by Thought Gang\\n\"I Love How You Love Me\"\\nWritten by Barry Mann and Larry Kolber\\nPerformed by The Paris Sisters\\nPublished by Screen Gems-EMI Music Inc. (BMI)\\n\"I Am\"\\nWritten and performed by BluntedBeatz\\n\"Stars And Stripes Forever\"\\nWritten by John Philip Sousa\\nPerformed and arranged by the U.S. Army Band\\n\"Snake Eyes\"\\nWritten by Dean Hurley, Riley Lynch and Alex Zhang Hungtai\\nPerformed by Trouble\\n\"Habit\" and \"Tabloid\"\\nWritten and performed by Uniform\\nCourtesy of Sacred Bones Records\\n\"Windswept\"\\nWritten and performed by Johnny Jewel\\nCourtesy of Italians Do It Better\\nNotes\\nThis episode was dedicated to the memory of Marv Rosand.\\nAmy Shiels is credited as \"Amie\".\\nFrank, Dougie\\'s coworker who discovers he likes green tea lattes, is played by Bob Stephenson, who appeared in Episode 6 as the burger cook at the Double R Diner. This was Stephenson\\'s first acting gig.\\nUpon release, Twin Peaks: The Return earned some criticism for earning the \"Empty Cup Award,\" a satirical achievement for television series where actors handle coffee cups that are claimed to be full in the dialogue but are very clearly empty based on how they are handled by the performers. In the case of this episode, however, Kyle MacLachlan was given some praise for being the sole actor to handle his cup as though it were actually full, especially in an episode where a character (Phil Bisby) unrealistically balances two full trays of coffee while running around.\\nThe statue in front of Dougie\\'s workplace was not originally part of location and was brought by the production staff. It might be a statue of Donald Lynch, father of David Lynch, since according to the stand-in Tyler Malkin, Lynch talked to it saying \"Hi, Dad\".\\nThe numbers input by the doppelganger during his phone call are, using the standard DTMF tones pitched up 2 octaves for offscreen ones:\\n16 (pause) 1235789 (computer modem response) 3135378912315 (01189998819991197253 offscreen)\\nThis could be interpreted as two numbers dialing to get an outside line from the internal prison phone system, then a 7 digit local number calling a computer set up beforehand with a local number so it would be a free local call and finally a code that triggers a pre-planned, automated hack of the prison systems.',\n",
|
182 |
" 'meta': {'name': 'Part_5', 'url': 'https://twinpeaks.fandom.com/wiki/Part_5'}}"
|
183 |
]
|
184 |
},
|
185 |
+
"execution_count": 9,
|
186 |
"metadata": {},
|
187 |
+
"output_type": "execute_result"
|
188 |
}
|
189 |
],
|
190 |
"source": [
|
|
|
193 |
},
|
194 |
{
|
195 |
"cell_type": "markdown",
|
196 |
+
"id": "Yu3bAUPoLrPI",
|
|
|
|
|
|
|
197 |
"metadata": {
|
198 |
"id": "Yu3bAUPoLrPI"
|
199 |
},
|
200 |
+
"source": [
|
201 |
+
"## Create document store ([FAISS](https://github.com/facebookresearch/faiss)) and write documents\n",
|
202 |
+
"\n"
|
203 |
+
]
|
204 |
},
|
205 |
{
|
206 |
"cell_type": "code",
|
207 |
"execution_count": 10,
|
208 |
"id": "bfe846df",
|
209 |
"metadata": {
|
210 |
+
"colab": {
|
211 |
+
"base_uri": "https://localhost:8080/"
|
212 |
+
},
|
213 |
"execution": {
|
214 |
"iopub.execute_input": "2022-01-09T08:40:59.678181Z",
|
215 |
"iopub.status.busy": "2022-01-09T08:40:59.678003Z",
|
|
|
218 |
"shell.execute_reply.started": "2022-01-09T08:40:59.678161Z"
|
219 |
},
|
220 |
"id": "bfe846df",
|
|
|
|
|
|
|
221 |
"outputId": "be9c9ef8-bcc4-4c4a-e7a5-9003077a7ea3"
|
222 |
},
|
223 |
"outputs": [
|
224 |
{
|
|
|
225 |
"name": "stderr",
|
226 |
+
"output_type": "stream",
|
227 |
"text": [
|
228 |
"INFO - haystack.modeling.model.optimization - apex not found, won't use it. See https://nvidia.github.io/apex/\n",
|
229 |
"ERROR - root - Failed to import 'magic' (from 'python-magic' and 'python-magic-bin' on Windows). FileTypeClassifier will not perform mimetype detection on extensionless files. Please make sure the necessary OS libraries are installed if you need this functionality.\n",
|
|
|
245 |
"execution_count": 11,
|
246 |
"id": "191144b4",
|
247 |
"metadata": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
248 |
"colab": {
|
249 |
"base_uri": "https://localhost:8080/",
|
250 |
"height": 49,
|
|
|
262 |
"3739785a410b4eadb0c36881e41b88ed"
|
263 |
]
|
264 |
},
|
265 |
+
"execution": {
|
266 |
+
"iopub.execute_input": "2022-01-09T08:41:10.695292Z",
|
267 |
+
"iopub.status.busy": "2022-01-09T08:41:10.695064Z",
|
268 |
+
"iopub.status.idle": "2022-01-09T08:41:22.144864Z",
|
269 |
+
"shell.execute_reply": "2022-01-09T08:41:22.144203Z",
|
270 |
+
"shell.execute_reply.started": "2022-01-09T08:41:10.695271Z"
|
271 |
+
},
|
272 |
"id": "191144b4",
|
273 |
"outputId": "f564e88e-be20-4f20-b4ed-4c663bfd71ac"
|
274 |
},
|
275 |
"outputs": [
|
276 |
{
|
|
|
277 |
"data": {
|
|
|
|
|
|
|
278 |
"application/vnd.jupyter.widget-view+json": {
|
279 |
+
"model_id": "81c8d8eb80d64687bdcc31ab1e3f156e",
|
280 |
"version_major": 2,
|
281 |
+
"version_minor": 0
|
282 |
+
},
|
283 |
+
"text/plain": [
|
284 |
+
"Writing Documents: 0%| | 0/134 [00:00<?, ?it/s]"
|
285 |
+
]
|
286 |
},
|
287 |
+
"metadata": {},
|
288 |
+
"output_type": "display_data"
|
289 |
}
|
290 |
],
|
291 |
"source": [
|
|
|
295 |
},
|
296 |
{
|
297 |
"cell_type": "code",
|
298 |
+
"execution_count": 23,
|
299 |
+
"id": "fCVArUtw1rV5",
|
|
|
300 |
"metadata": {
|
301 |
"colab": {
|
302 |
"base_uri": "https://localhost:8080/"
|
|
|
304 |
"id": "fCVArUtw1rV5",
|
305 |
"outputId": "852fc261-c684-4f65-d17f-011f2b1860d0"
|
306 |
},
|
|
|
|
|
307 |
"outputs": [
|
308 |
{
|
|
|
309 |
"data": {
|
310 |
"text/plain": [
|
311 |
"134"
|
312 |
]
|
313 |
},
|
314 |
+
"execution_count": 23,
|
315 |
"metadata": {},
|
316 |
+
"output_type": "execute_result"
|
317 |
}
|
318 |
+
],
|
319 |
+
"source": [
|
320 |
+
"len(document_store.get_all_documents())"
|
321 |
]
|
322 |
},
|
323 |
{
|
324 |
"cell_type": "markdown",
|
325 |
+
"id": "oPOm5UsJxG37",
|
|
|
|
|
326 |
"metadata": {
|
327 |
"id": "oPOm5UsJxG37"
|
328 |
},
|
329 |
+
"source": [
|
330 |
+
"## Generate questions and save them"
|
331 |
+
]
|
332 |
},
|
333 |
{
|
334 |
"cell_type": "code",
|
335 |
+
"execution_count": 16,
|
336 |
+
"id": "dYf2rgwzxa1z",
|
337 |
+
"metadata": {
|
338 |
+
"id": "dYf2rgwzxa1z"
|
339 |
+
},
|
340 |
+
"outputs": [],
|
341 |
"source": [
|
342 |
"from haystack.nodes import QuestionGenerator\n",
|
343 |
"from haystack.pipelines import QuestionGenerationPipeline\n",
|
344 |
"from haystack.utils import launch_es, print_questions\n"
|
345 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
346 |
},
|
347 |
{
|
348 |
"cell_type": "code",
|
349 |
+
"execution_count": 19,
|
350 |
+
"id": "EnQk6d0FzlnT",
|
|
|
351 |
"metadata": {
|
352 |
"id": "EnQk6d0FzlnT"
|
353 |
},
|
354 |
+
"outputs": [],
|
355 |
+
"source": [
|
356 |
+
"OUTPUT_QUESTIONS_FILE='/content/drive/MyDrive/Colab Notebooks/wklp/questions.txt'"
|
357 |
+
]
|
358 |
},
|
359 |
{
|
360 |
"cell_type": "code",
|
361 |
+
"execution_count": 24,
|
362 |
+
"id": "8fYMVd_ggJnw",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
363 |
"metadata": {
|
|
|
364 |
"colab": {
|
365 |
"base_uri": "https://localhost:8080/"
|
366 |
},
|
367 |
+
"id": "8fYMVd_ggJnw",
|
368 |
"outputId": "880c4204-854c-49fc-f82b-ec008076b662"
|
369 |
},
|
|
|
|
|
370 |
"outputs": [
|
371 |
{
|
|
|
|
|
|
|
372 |
"name": "stderr",
|
373 |
"output_type": "stream",
|
374 |
"text": [
|
|
|
377 |
]
|
378 |
},
|
379 |
{
|
|
|
380 |
"name": "stdout",
|
381 |
+
"output_type": "stream",
|
382 |
"text": [
|
383 |
"0.0\n",
|
384 |
"3.731343283582089\n",
|
|
|
407 |
"89.55223880597015\n"
|
408 |
]
|
409 |
}
|
410 |
+
],
|
411 |
+
"source": [
|
412 |
+
"# Initialize Question Generator\n",
|
413 |
+
"question_generator = QuestionGenerator()\n",
|
414 |
+
"\n",
|
415 |
+
"question_generation_pipeline = QuestionGenerationPipeline(question_generator)\n",
|
416 |
+
"for idx, document in enumerate(document_store):\n",
|
417 |
+
" if idx%5==0:\n",
|
418 |
+
" print(idx/len(docs)*100)\n",
|
419 |
+
" results = question_generation_pipeline.run(documents=[document])\n",
|
420 |
+
"\n",
|
421 |
+
" # save to file\n",
|
422 |
+
" questions_for_doc=f'{idx}: {document.content[:100]}...\\n'+'-'*15+'\\n'\n",
|
423 |
+
" if \"generated_questions\" in results.keys():\n",
|
424 |
+
" for result in results[\"generated_questions\"]:\n",
|
425 |
+
" for question in result[\"questions\"]:\n",
|
426 |
+
" questions_for_doc+=(f\" - {question}\\n\")\n",
|
427 |
+
" with open(OUTPUT_QUESTIONS_FILE,'a+') as fo:\n",
|
428 |
+
" fo.write(questions_for_doc)"
|
429 |
]
|
430 |
},
|
431 |
{
|
432 |
"cell_type": "code",
|
433 |
+
"execution_count": 30,
|
434 |
+
"id": "1i2C6PhSKHZY",
|
|
|
|
|
435 |
"metadata": {
|
436 |
"colab": {
|
437 |
"base_uri": "https://localhost:8080/"
|
|
|
439 |
"id": "1i2C6PhSKHZY",
|
440 |
"outputId": "ba202bf9-b445-43ca-c267-7fd873a80e81"
|
441 |
},
|
|
|
|
|
442 |
"outputs": [
|
443 |
{
|
|
|
444 |
"name": "stdout",
|
445 |
+
"output_type": "stream",
|
446 |
"text": [
|
447 |
"124: Jerry Horne\n",
|
448 |
"Jeremy \"Jerry\" Horne was the playboy brother of Benjamin Horne and the uncle of Audrey a...\n",
|
|
|
670 |
" - What actor reprised the role in the 2017 revival?\n"
|
671 |
]
|
672 |
}
|
673 |
+
],
|
674 |
+
"source": [
|
675 |
+
"print(questions_for_doc)\n",
|
676 |
+
"print_questions(results)"
|
677 |
]
|
678 |
},
|
679 |
{
|
680 |
"cell_type": "code",
|
681 |
+
"execution_count": null,
|
682 |
+
"id": "QN9xxnTrKLTX",
|
|
|
683 |
"metadata": {
|
684 |
"id": "QN9xxnTrKLTX"
|
685 |
},
|
686 |
+
"outputs": [],
|
687 |
+
"source": []
|
|
|
688 |
}
|
689 |
],
|
690 |
"metadata": {
|
691 |
+
"accelerator": "GPU",
|
692 |
+
"colab": {
|
693 |
+
"collapsed_sections": [],
|
694 |
+
"name": "question_generation.ipynb",
|
695 |
+
"provenance": [],
|
696 |
+
"toc_visible": true
|
697 |
+
},
|
698 |
"kernelspec": {
|
699 |
"display_name": "saturn (Python 3)",
|
700 |
"language": "python",
|
|
|
712 |
"pygments_lexer": "ipython3",
|
713 |
"version": "3.9.5"
|
714 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
715 |
"widgets": {
|
716 |
"application/vnd.jupyter.widget-state+json": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
717 |
"0cc8731e80994ab097236f295da512c7": {
|
718 |
"model_module": "@jupyter-widgets/controls",
|
|
|
719 |
"model_module_version": "1.5.0",
|
720 |
+
"model_name": "FloatProgressModel",
|
721 |
"state": {
|
722 |
"_dom_classes": [],
|
723 |
"_model_module": "@jupyter-widgets/controls",
|
|
|
738 |
"value": 134
|
739 |
}
|
740 |
},
|
741 |
+
"3739785a410b4eadb0c36881e41b88ed": {
|
742 |
"model_module": "@jupyter-widgets/controls",
|
|
|
743 |
"model_module_version": "1.5.0",
|
744 |
+
"model_name": "DescriptionStyleModel",
|
745 |
"state": {
|
|
|
746 |
"_model_module": "@jupyter-widgets/controls",
|
747 |
"_model_module_version": "1.5.0",
|
748 |
+
"_model_name": "DescriptionStyleModel",
|
749 |
"_view_count": null,
|
750 |
+
"_view_module": "@jupyter-widgets/base",
|
751 |
+
"_view_module_version": "1.2.0",
|
752 |
+
"_view_name": "StyleView",
|
753 |
+
"description_width": ""
|
|
|
|
|
|
|
|
|
|
|
754 |
}
|
755 |
},
|
756 |
"4ee263ae48834c5dbb138a5dbe2183bd": {
|
757 |
"model_module": "@jupyter-widgets/base",
|
|
|
758 |
"model_module_version": "1.2.0",
|
759 |
+
"model_name": "LayoutModel",
|
760 |
"state": {
|
761 |
"_model_module": "@jupyter-widgets/base",
|
762 |
"_model_module_version": "1.2.0",
|
|
|
805 |
"width": null
|
806 |
}
|
807 |
},
|
808 |
+
"6bba9b7051f64993a477688eb8c6ed92": {
|
809 |
+
"model_module": "@jupyter-widgets/controls",
|
810 |
+
"model_module_version": "1.5.0",
|
811 |
+
"model_name": "DescriptionStyleModel",
|
812 |
+
"state": {
|
813 |
+
"_model_module": "@jupyter-widgets/controls",
|
814 |
+
"_model_module_version": "1.5.0",
|
815 |
+
"_model_name": "DescriptionStyleModel",
|
816 |
+
"_view_count": null,
|
817 |
+
"_view_module": "@jupyter-widgets/base",
|
818 |
+
"_view_module_version": "1.2.0",
|
819 |
+
"_view_name": "StyleView",
|
820 |
+
"description_width": ""
|
821 |
+
}
|
822 |
+
},
|
823 |
+
"81c8d8eb80d64687bdcc31ab1e3f156e": {
|
824 |
+
"model_module": "@jupyter-widgets/controls",
|
825 |
+
"model_module_version": "1.5.0",
|
826 |
+
"model_name": "HBoxModel",
|
827 |
+
"state": {
|
828 |
+
"_dom_classes": [],
|
829 |
+
"_model_module": "@jupyter-widgets/controls",
|
830 |
+
"_model_module_version": "1.5.0",
|
831 |
+
"_model_name": "HBoxModel",
|
832 |
+
"_view_count": null,
|
833 |
+
"_view_module": "@jupyter-widgets/controls",
|
834 |
+
"_view_module_version": "1.5.0",
|
835 |
+
"_view_name": "HBoxView",
|
836 |
+
"box_style": "",
|
837 |
+
"children": [
|
838 |
+
"IPY_MODEL_b18ba72ca8e94c508fde617b04b82273",
|
839 |
+
"IPY_MODEL_0cc8731e80994ab097236f295da512c7",
|
840 |
+
"IPY_MODEL_e1b08799bca14b7aa8aed017b5545923"
|
841 |
+
],
|
842 |
+
"layout": "IPY_MODEL_4ee263ae48834c5dbb138a5dbe2183bd"
|
843 |
+
}
|
844 |
+
},
|
845 |
+
"887b5bcc8d1e4c38b40700d324424f33": {
|
846 |
"model_module": "@jupyter-widgets/base",
|
|
|
847 |
"model_module_version": "1.2.0",
|
848 |
+
"model_name": "LayoutModel",
|
849 |
"state": {
|
850 |
"_model_module": "@jupyter-widgets/base",
|
851 |
"_model_module_version": "1.2.0",
|
|
|
894 |
"width": null
|
895 |
}
|
896 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
897 |
"96e28522b93b4c77a9d6da982d601465": {
|
898 |
"model_module": "@jupyter-widgets/base",
|
|
|
899 |
"model_module_version": "1.2.0",
|
900 |
+
"model_name": "LayoutModel",
|
901 |
"state": {
|
902 |
"_model_module": "@jupyter-widgets/base",
|
903 |
"_model_module_version": "1.2.0",
|
|
|
948 |
},
|
949 |
"9eb3c190c28b47618572ecd9e3b80932": {
|
950 |
"model_module": "@jupyter-widgets/controls",
|
|
|
951 |
"model_module_version": "1.5.0",
|
952 |
+
"model_name": "ProgressStyleModel",
|
953 |
"state": {
|
954 |
"_model_module": "@jupyter-widgets/controls",
|
955 |
"_model_module_version": "1.5.0",
|
|
|
962 |
"description_width": ""
|
963 |
}
|
964 |
},
|
965 |
+
"ae4e66819ab04f68a867768d03bc4a04": {
|
966 |
"model_module": "@jupyter-widgets/base",
|
|
|
967 |
"model_module_version": "1.2.0",
|
968 |
+
"model_name": "LayoutModel",
|
969 |
"state": {
|
970 |
"_model_module": "@jupyter-widgets/base",
|
971 |
"_model_module_version": "1.2.0",
|
|
|
1014 |
"width": null
|
1015 |
}
|
1016 |
},
|
1017 |
+
"b18ba72ca8e94c508fde617b04b82273": {
|
1018 |
"model_module": "@jupyter-widgets/controls",
|
|
|
1019 |
"model_module_version": "1.5.0",
|
1020 |
+
"model_name": "HTMLModel",
|
1021 |
"state": {
|
1022 |
+
"_dom_classes": [],
|
1023 |
"_model_module": "@jupyter-widgets/controls",
|
1024 |
"_model_module_version": "1.5.0",
|
1025 |
+
"_model_name": "HTMLModel",
|
1026 |
"_view_count": null,
|
1027 |
+
"_view_module": "@jupyter-widgets/controls",
|
1028 |
+
"_view_module_version": "1.5.0",
|
1029 |
+
"_view_name": "HTMLView",
|
1030 |
+
"description": "",
|
1031 |
+
"description_tooltip": null,
|
1032 |
+
"layout": "IPY_MODEL_ae4e66819ab04f68a867768d03bc4a04",
|
1033 |
+
"placeholder": "β",
|
1034 |
+
"style": "IPY_MODEL_6bba9b7051f64993a477688eb8c6ed92",
|
1035 |
+
"value": "Writing Documents: "
|
1036 |
+
}
|
1037 |
+
},
|
1038 |
+
"e1b08799bca14b7aa8aed017b5545923": {
|
1039 |
+
"model_module": "@jupyter-widgets/controls",
|
1040 |
+
"model_module_version": "1.5.0",
|
1041 |
+
"model_name": "HTMLModel",
|
1042 |
+
"state": {
|
1043 |
+
"_dom_classes": [],
|
1044 |
+
"_model_module": "@jupyter-widgets/controls",
|
1045 |
+
"_model_module_version": "1.5.0",
|
1046 |
+
"_model_name": "HTMLModel",
|
1047 |
+
"_view_count": null,
|
1048 |
+
"_view_module": "@jupyter-widgets/controls",
|
1049 |
+
"_view_module_version": "1.5.0",
|
1050 |
+
"_view_name": "HTMLView",
|
1051 |
+
"description": "",
|
1052 |
+
"description_tooltip": null,
|
1053 |
+
"layout": "IPY_MODEL_887b5bcc8d1e4c38b40700d324424f33",
|
1054 |
+
"placeholder": "β",
|
1055 |
+
"style": "IPY_MODEL_3739785a410b4eadb0c36881e41b88ed",
|
1056 |
+
"value": " 10000/? [00:00<00:00, 14955.47it/s]"
|
1057 |
}
|
1058 |
}
|
1059 |
}
|
|
|
1061 |
},
|
1062 |
"nbformat": 4,
|
1063 |
"nbformat_minor": 5
|
1064 |
+
}
|