Stefano Fiorucci commited on
Commit
e49f5ad
0 Parent(s):

Initial commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +30 -0
  2. .gitignore +152 -0
  3. LICENSE +21 -0
  4. README.md +13 -0
  5. app.py +151 -0
  6. crawler/requirements.txt +2 -0
  7. crawler/tpcrawler/scrapy.cfg +11 -0
  8. crawler/tpcrawler/tpcrawler/__init__.py +0 -0
  9. crawler/tpcrawler/tpcrawler/items.py +12 -0
  10. crawler/tpcrawler/tpcrawler/middlewares.py +103 -0
  11. crawler/tpcrawler/tpcrawler/pipelines.py +13 -0
  12. crawler/tpcrawler/tpcrawler/settings.py +88 -0
  13. crawler/tpcrawler/tpcrawler/spiders/__init__.py +4 -0
  14. crawler/tpcrawler/tpcrawler/spiders/tpcrawler.py +82 -0
  15. data/index/my_faiss_index.faiss +3 -0
  16. data/index/my_faiss_index.json +1 -0
  17. data/input_docs/%2522Diane...%2522_-_The_Twin_Peaks_Tapes_of_Agent_Cooper.json +1 -0
  18. data/input_docs/%27Tis_Pity_She%27s_a_Whore.json +1 -0
  19. data/input_docs/17_Pieces_of_Pie%3A_Shooting_at_the_Mar_T_(aka_RR)_Diner.json +1 -0
  20. data/input_docs/19th_century.json +1 -0
  21. data/input_docs/1st_to_18th_century.json +1 -0
  22. data/input_docs/20th_century.json +1 -0
  23. data/input_docs/21st_century.json +1 -0
  24. data/input_docs/A_Real_Indication.json +1 -0
  25. data/input_docs/A_Slice_of_Lynch.json +1 -0
  26. data/input_docs/A_Very_Lovely_Dream%3A_One_Week_in_Twin_Peaks.json +1 -0
  27. data/input_docs/Aaron_Burr.json +1 -0
  28. data/input_docs/Abandoned_cabin.json +1 -0
  29. data/input_docs/Abandoned_gas_station.json +1 -0
  30. data/input_docs/Abbie.json +1 -0
  31. data/input_docs/Accountant.json +1 -0
  32. data/input_docs/Ace%27s_Bar-B-Que.json +1 -0
  33. data/input_docs/Agnes_Turnquist.json +1 -0
  34. data/input_docs/Al.json +1 -0
  35. data/input_docs/Al_Cooper.json +1 -0
  36. data/input_docs/Alan_Traherne.json +1 -0
  37. data/input_docs/Albert_Furrer.json +1 -0
  38. data/input_docs/Albert_Rosenfield.json +1 -0
  39. data/input_docs/Aleister_Crowley.json +1 -0
  40. data/input_docs/Alexander_Hamilton.json +1 -0
  41. data/input_docs/Alexander_MacKenzie.json +1 -0
  42. data/input_docs/Alice_Brady.json +1 -0
  43. data/input_docs/Alice_Tremond.json +1 -0
  44. data/input_docs/Allen_K._Boyle.json +1 -0
  45. data/input_docs/American_girl%27s_mother.json +1 -0
  46. data/input_docs/American_girl.json +1 -0
  47. data/input_docs/An_Introduction_to_David_Lynch.json +1 -0
  48. data/input_docs/Andrew_Packard.json +1 -0
  49. data/input_docs/Andrews.json +1 -0
  50. data/input_docs/Andy_(Bryn_Mawr).json +1 -0
.gitattributes ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
5
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.model filter=lfs diff=lfs merge=lfs -text
12
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
13
+ *.onnx filter=lfs diff=lfs merge=lfs -text
14
+ *.ot filter=lfs diff=lfs merge=lfs -text
15
+ *.parquet filter=lfs diff=lfs merge=lfs -text
16
+ *.pb filter=lfs diff=lfs merge=lfs -text
17
+ *.pt filter=lfs diff=lfs merge=lfs -text
18
+ *.pth filter=lfs diff=lfs merge=lfs -text
19
+ *.rar filter=lfs diff=lfs merge=lfs -text
20
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
21
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
22
+ *.tflite filter=lfs diff=lfs merge=lfs -text
23
+ *.tgz filter=lfs diff=lfs merge=lfs -text
24
+ *.wasm filter=lfs diff=lfs merge=lfs -text
25
+ *.xz filter=lfs diff=lfs merge=lfs -text
26
+ *.zip filter=lfs diff=lfs merge=lfs -text
27
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
28
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
29
+ *.db filter=lfs diff=lfs merge=lfs -text
30
+ *.faiss filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
105
+ __pypackages__/
106
+
107
+ # Celery stuff
108
+ celerybeat-schedule
109
+ celerybeat.pid
110
+
111
+ # SageMath parsed files
112
+ *.sage.py
113
+
114
+ # Environments
115
+ .env
116
+ .venv
117
+ env/
118
+ venv/
119
+ ENV/
120
+ env.bak/
121
+ venv.bak/
122
+
123
+ # Spyder project settings
124
+ .spyderproject
125
+ .spyproject
126
+
127
+ # Rope project settings
128
+ .ropeproject
129
+
130
+ # mkdocs documentation
131
+ /site
132
+
133
+ # mypy
134
+ .mypy_cache/
135
+ .dmypy.json
136
+ dmypy.json
137
+
138
+ # Pyre type checker
139
+ .pyre/
140
+
141
+ # pytype static type analyzer
142
+ .pytype/
143
+
144
+ # Cython debug symbols
145
+ cython_debug/
146
+
147
+ # PyCharm
148
+ # JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
149
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
150
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
151
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
152
+ #.idea/
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2022 Stefano Fiorucci
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Who Killed Laura Palmer
3
+ emoji: 🌖
4
+ colorFrom: gray
5
+ colorTo: red
6
+ sdk: streamlit
7
+ sdk_version: 1.2.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
app.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import streamlit as st
4
+ import subprocess
5
+ import sys
6
+ import logging
7
+ import pandas as pd
8
+ from json import JSONDecodeError
9
+ from pathlib import Path
10
+ from markdown import markdown
11
+ import random
12
+ from typing import List, Dict, Any, Tuple
13
+
14
+ from haystack.document_stores import ElasticsearchDocumentStore, FAISSDocumentStore
15
+ from haystack.nodes import EmbeddingRetriever
16
+ from haystack.pipelines import ExtractiveQAPipeline
17
+ from haystack.preprocessor.preprocessor import PreProcessor
18
+ from haystack.nodes import FARMReader, TransformersReader
19
+ from haystack.pipelines import ExtractiveQAPipeline
20
+ from annotated_text import annotation
21
+ import shutil
22
+
23
+ # FAISS index directory
24
+ INDEX_DIR = './index'
25
+
26
+
27
+ # the following function is cached to make index and models load only at start
28
+ @st.cache(hash_funcs={"builtins.SwigPyObject": lambda _: None}, allow_output_mutation=True)
29
+ def start_haystack():
30
+ """
31
+ load document store, retriever, reader and create pipeline
32
+ """
33
+ shutil.copy(f'{INDEX_DIR}/faiss_document_store.db','.')
34
+ document_store = FAISSDocumentStore(
35
+ faiss_index_path=f'{INDEX_DIR}/my_faiss_index.faiss',
36
+ faiss_config_path=f'{INDEX_DIR}/my_faiss_index.json')
37
+ print (f'Index size: {document_store.get_document_count()}')
38
+ retriever = EmbeddingRetriever(
39
+ document_store=document_store,
40
+ embedding_model="sentence-transformers/multi-qa-mpnet-base-dot-v1",
41
+ model_format="sentence_transformers"
42
+ )
43
+ reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=True)
44
+ pipe = ExtractiveQAPipeline(reader, retriever)
45
+ return pipe
46
+
47
+ def set_state_if_absent(key, value):
48
+ if key not in st.session_state:
49
+ st.session_state[key] = value
50
+
51
+ def get_backlink(result, ip) -> str:
52
+ """
53
+ Build URL from metadata and Google VM IP
54
+ (quick and dirty)
55
+ """
56
+ meta = result['meta']
57
+ fpath = meta['filepath'].rpartition('/')[-1]
58
+ fname = fpath.rpartition('.')[0]
59
+ return f'http://{ip}:8000/data/final/ner_html/{fname}.html'
60
+
61
+
62
+ def query(pipe, question):
63
+ """Run query and get answers"""
64
+ return (pipe.run(question, params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}}), None)
65
+
66
+ def main():
67
+ pipe=start_haystack()
68
+ my_ip=subprocess.run(['curl', 'ifconfig.me'], stdout=subprocess.PIPE).stdout.decode('utf-8')
69
+
70
+ # Persistent state
71
+ set_state_if_absent('question', "")
72
+ set_state_if_absent('answer', '')
73
+ set_state_if_absent('results', None)
74
+ set_state_if_absent('raw_json', None)
75
+ set_state_if_absent('random_question_requested', False)
76
+
77
+ # Small callback to reset the interface in case the text of the question changes
78
+ def reset_results(*args):
79
+ st.session_state.answer = None
80
+ st.session_state.results = None
81
+ st.session_state.raw_json = None
82
+
83
+ # Title
84
+ st.write("# Question answering engine")
85
+
86
+ st.markdown("""<br/>
87
+ Ask any question and see if the system can find the correct answer to your query!
88
+
89
+ *Note: do not use keywords, but full-fledged questions.*
90
+ """, unsafe_allow_html=True)
91
+
92
+ # Search bar
93
+ question = st.text_input("",
94
+ value=st.session_state.question,
95
+ max_chars=100,
96
+ #on_change=reset_results
97
+ )
98
+ col1, col2 = st.columns(2)
99
+ col1.markdown("<style>.stButton button {width:100%;}</style>", unsafe_allow_html=True)
100
+ col2.markdown("<style>.stButton button {width:100%;}</style>", unsafe_allow_html=True)
101
+
102
+ # Run button
103
+ run_pressed = col1.button("Run")
104
+
105
+ run_query = (run_pressed or question != st.session_state.question) and not st.session_state.random_question_requested
106
+
107
+ # Get results for query
108
+ if run_query and question:
109
+ reset_results()
110
+ st.session_state.question = question
111
+
112
+ with st.spinner(
113
+ "🧠 &nbsp;&nbsp; Performing neural search on documents..."
114
+
115
+ ):
116
+ try:
117
+ st.session_state.results, st.session_state.raw_json = query(pipe, question)
118
+ except JSONDecodeError as je:
119
+ st.error("👓 &nbsp;&nbsp; An error occurred reading the results. Is the document store working?")
120
+ return
121
+ except Exception as e:
122
+ logging.exception(e)
123
+ if "The server is busy processing requests" in str(e) or "503" in str(e):
124
+ st.error("🧑‍🌾 &nbsp;&nbsp; All our workers are busy! Try again later.")
125
+ else:
126
+ st.error("🐞 &nbsp;&nbsp; An error occurred during the request.")
127
+ return
128
+
129
+ if st.session_state.results:
130
+ st.write("## Results:")
131
+
132
+ alert_irrelevance=True
133
+
134
+ for count, result in enumerate(st.session_state.results['answers']):
135
+ result=result.to_dict()
136
+ if result["answer"]:
137
+ if alert_irrelevance and result['score']<=0.40:
138
+ alert_irrelevance = False
139
+ st.write("<h3 style='color: red'>Attention, the following answers have low relevance:</h3>", unsafe_allow_html=True)
140
+
141
+ answer, context = result["answer"], result["context"]
142
+ #authors, title = result["meta"]["authors"], result["meta"]["title"]
143
+ start_idx = context.find(answer)
144
+ end_idx = start_idx + len(answer)
145
+ #url = get_backlink(result, my_ip)
146
+ # Hack due to this bug: https://github.com/streamlit/streamlit/issues/3190
147
+ st.write(markdown("- ..."+context[:start_idx] + str(annotation(answer, "ANSWER", "#8ef")) + context[end_idx:]+"..."), unsafe_allow_html=True)
148
+ #st.write(markdown(f"<a href='{url}'>{title} - <i>{authors}</i></a>"), unsafe_allow_html=True)
149
+ #st.write(markdown(f"**Relevance:** {result['score']:.2f}"), unsafe_allow_html=True)
150
+
151
+ main()
crawler/requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
1
+ fandom-py==0.2.1
2
+ Scrapy==2.5.1
crawler/tpcrawler/scrapy.cfg ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # Automatically created by: scrapy startproject
2
+ #
3
+ # For more information about the [deploy] section see:
4
+ # https://scrapyd.readthedocs.io/en/latest/deploy.html
5
+
6
+ [settings]
7
+ default = tpcrawler.settings
8
+
9
+ [deploy]
10
+ #url = http://localhost:6800/
11
+ project = tpcrawler
crawler/tpcrawler/tpcrawler/__init__.py ADDED
File without changes
crawler/tpcrawler/tpcrawler/items.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Define here the models for your scraped items
2
+ #
3
+ # See documentation in:
4
+ # https://docs.scrapy.org/en/latest/topics/items.html
5
+
6
+ import scrapy
7
+
8
+
9
+ class TpcrawlerItem(scrapy.Item):
10
+ # define the fields for your item here like:
11
+ # name = scrapy.Field()
12
+ pass
crawler/tpcrawler/tpcrawler/middlewares.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Define here the models for your spider middleware
2
+ #
3
+ # See documentation in:
4
+ # https://docs.scrapy.org/en/latest/topics/spider-middleware.html
5
+
6
+ from scrapy import signals
7
+
8
+ # useful for handling different item types with a single interface
9
+ from itemadapter import is_item, ItemAdapter
10
+
11
+
12
+ class TpcrawlerSpiderMiddleware:
13
+ # Not all methods need to be defined. If a method is not defined,
14
+ # scrapy acts as if the spider middleware does not modify the
15
+ # passed objects.
16
+
17
+ @classmethod
18
+ def from_crawler(cls, crawler):
19
+ # This method is used by Scrapy to create your spiders.
20
+ s = cls()
21
+ crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
22
+ return s
23
+
24
+ def process_spider_input(self, response, spider):
25
+ # Called for each response that goes through the spider
26
+ # middleware and into the spider.
27
+
28
+ # Should return None or raise an exception.
29
+ return None
30
+
31
+ def process_spider_output(self, response, result, spider):
32
+ # Called with the results returned from the Spider, after
33
+ # it has processed the response.
34
+
35
+ # Must return an iterable of Request, or item objects.
36
+ for i in result:
37
+ yield i
38
+
39
+ def process_spider_exception(self, response, exception, spider):
40
+ # Called when a spider or process_spider_input() method
41
+ # (from other spider middleware) raises an exception.
42
+
43
+ # Should return either None or an iterable of Request or item objects.
44
+ pass
45
+
46
+ def process_start_requests(self, start_requests, spider):
47
+ # Called with the start requests of the spider, and works
48
+ # similarly to the process_spider_output() method, except
49
+ # that it doesn’t have a response associated.
50
+
51
+ # Must return only requests (not items).
52
+ for r in start_requests:
53
+ yield r
54
+
55
+ def spider_opened(self, spider):
56
+ spider.logger.info('Spider opened: %s' % spider.name)
57
+
58
+
59
+ class TpcrawlerDownloaderMiddleware:
60
+ # Not all methods need to be defined. If a method is not defined,
61
+ # scrapy acts as if the downloader middleware does not modify the
62
+ # passed objects.
63
+
64
+ @classmethod
65
+ def from_crawler(cls, crawler):
66
+ # This method is used by Scrapy to create your spiders.
67
+ s = cls()
68
+ crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
69
+ return s
70
+
71
+ def process_request(self, request, spider):
72
+ # Called for each request that goes through the downloader
73
+ # middleware.
74
+
75
+ # Must either:
76
+ # - return None: continue processing this request
77
+ # - or return a Response object
78
+ # - or return a Request object
79
+ # - or raise IgnoreRequest: process_exception() methods of
80
+ # installed downloader middleware will be called
81
+ return None
82
+
83
+ def process_response(self, request, response, spider):
84
+ # Called with the response returned from the downloader.
85
+
86
+ # Must either;
87
+ # - return a Response object
88
+ # - return a Request object
89
+ # - or raise IgnoreRequest
90
+ return response
91
+
92
+ def process_exception(self, request, exception, spider):
93
+ # Called when a download handler or a process_request()
94
+ # (from other downloader middleware) raises an exception.
95
+
96
+ # Must either:
97
+ # - return None: continue processing this exception
98
+ # - return a Response object: stops process_exception() chain
99
+ # - return a Request object: stops process_exception() chain
100
+ pass
101
+
102
+ def spider_opened(self, spider):
103
+ spider.logger.info('Spider opened: %s' % spider.name)
crawler/tpcrawler/tpcrawler/pipelines.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Define your item pipelines here
2
+ #
3
+ # Don't forget to add your pipeline to the ITEM_PIPELINES setting
4
+ # See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
5
+
6
+
7
+ # useful for handling different item types with a single interface
8
+ from itemadapter import ItemAdapter
9
+
10
+
11
+ class TpcrawlerPipeline:
12
+ def process_item(self, item, spider):
13
+ return item
crawler/tpcrawler/tpcrawler/settings.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Scrapy settings for tpcrawler project
2
+ #
3
+ # For simplicity, this file contains only settings considered important or
4
+ # commonly used. You can find more settings consulting the documentation:
5
+ #
6
+ # https://docs.scrapy.org/en/latest/topics/settings.html
7
+ # https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
8
+ # https://docs.scrapy.org/en/latest/topics/spider-middleware.html
9
+
10
+ BOT_NAME = 'tpcrawler'
11
+
12
+ SPIDER_MODULES = ['tpcrawler.spiders']
13
+ NEWSPIDER_MODULE = 'tpcrawler.spiders'
14
+
15
+
16
+ # Crawl responsibly by identifying yourself (and your website) on the user-agent
17
+ #USER_AGENT = 'tpcrawler (+http://www.yourdomain.com)'
18
+
19
+ # Obey robots.txt rules
20
+ ROBOTSTXT_OBEY = True
21
+
22
+ # Configure maximum concurrent requests performed by Scrapy (default: 16)
23
+ #CONCURRENT_REQUESTS = 32
24
+
25
+ # Configure a delay for requests for the same website (default: 0)
26
+ # See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
27
+ # See also autothrottle settings and docs
28
+ #DOWNLOAD_DELAY = 3
29
+ # The download delay setting will honor only one of:
30
+ #CONCURRENT_REQUESTS_PER_DOMAIN = 16
31
+ #CONCURRENT_REQUESTS_PER_IP = 16
32
+
33
+ # Disable cookies (enabled by default)
34
+ #COOKIES_ENABLED = False
35
+
36
+ # Disable Telnet Console (enabled by default)
37
+ #TELNETCONSOLE_ENABLED = False
38
+
39
+ # Override the default request headers:
40
+ #DEFAULT_REQUEST_HEADERS = {
41
+ # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
42
+ # 'Accept-Language': 'en',
43
+ #}
44
+
45
+ # Enable or disable spider middlewares
46
+ # See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
47
+ #SPIDER_MIDDLEWARES = {
48
+ # 'tpcrawler.middlewares.TpcrawlerSpiderMiddleware': 543,
49
+ #}
50
+
51
+ # Enable or disable downloader middlewares
52
+ # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
53
+ #DOWNLOADER_MIDDLEWARES = {
54
+ # 'tpcrawler.middlewares.TpcrawlerDownloaderMiddleware': 543,
55
+ #}
56
+
57
+ # Enable or disable extensions
58
+ # See https://docs.scrapy.org/en/latest/topics/extensions.html
59
+ #EXTENSIONS = {
60
+ # 'scrapy.extensions.telnet.TelnetConsole': None,
61
+ #}
62
+
63
+ # Configure item pipelines
64
+ # See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
65
+ #ITEM_PIPELINES = {
66
+ # 'tpcrawler.pipelines.TpcrawlerPipeline': 300,
67
+ #}
68
+
69
+ # Enable and configure the AutoThrottle extension (disabled by default)
70
+ # See https://docs.scrapy.org/en/latest/topics/autothrottle.html
71
+ #AUTOTHROTTLE_ENABLED = True
72
+ # The initial download delay
73
+ #AUTOTHROTTLE_START_DELAY = 5
74
+ # The maximum download delay to be set in case of high latencies
75
+ #AUTOTHROTTLE_MAX_DELAY = 60
76
+ # The average number of requests Scrapy should be sending in parallel to
77
+ # each remote server
78
+ #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
79
+ # Enable showing throttling stats for every response received:
80
+ #AUTOTHROTTLE_DEBUG = False
81
+
82
+ # Enable and configure HTTP caching (disabled by default)
83
+ # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
84
+ #HTTPCACHE_ENABLED = True
85
+ #HTTPCACHE_EXPIRATION_SECS = 0
86
+ #HTTPCACHE_DIR = 'httpcache'
87
+ #HTTPCACHE_IGNORE_HTTP_CODES = []
88
+ #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
crawler/tpcrawler/tpcrawler/spiders/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
1
+ # This package will contain the spiders of your Scrapy project
2
+ #
3
+ # Please refer to the documentation for information on how to create and manage
4
+ # your spiders.
crawler/tpcrawler/tpcrawler/spiders/tpcrawler.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import scrapy
2
+ from scrapy.utils.response import open_in_browser
3
+ from scrapy.http import TextResponse
4
+ import re
5
+ import fandom
6
+ import json
7
+
8
+ fandom.set_wiki("Twinpeaks")
9
+
10
+ article_id_pattern="wgArticleId\"\:([0-9]+)"
11
+ categories_xpath="//div[@class='page-header__categories']/a//text()"
12
+ excluded_categories=set("""Twin Peaks (2017) crew
13
+ Actors
14
+ Camera and electrical department
15
+ Casting department
16
+ Catering department
17
+ Costume department
18
+ Directors
19
+ Editors
20
+ Location department
21
+ Makeup department
22
+ Medics
23
+ Music department
24
+ Producers
25
+ Production associates
26
+ Special and visual effects department
27
+ Stand-ins
28
+ Story editors
29
+ Stunt department
30
+ Transportation department
31
+ Writers
32
+ Years
33
+ Decades
34
+ Days
35
+ Production timeline""".split("\n"))
36
+
37
+ #print(excluded_categories)
38
+
39
+ class Tpcrawler(scrapy.Spider):
40
+ name = 'tpcrawler'
41
+ allowed_domains = ['https://twinpeaks.fandom.com/']
42
+ start_urls = ['https://twinpeaks.fandom.com/wiki/Special:AllPages']
43
+
44
+
45
+ def parse(self, response):
46
+ #open_in_browser(response)
47
+
48
+ hrefs = response.xpath("//ul[@class='mw-allpages-chunk']/li/a[not(contains(@class, 'redirect'))]/@href").extract()
49
+ for href in hrefs:
50
+ yield scrapy.Request(url=response.urljoin(href), callback=self.parse_page, dont_filter=True,
51
+ meta={'name':href.rpartition('/')[-1],
52
+ 'url':response.urljoin(href)})
53
+
54
+ next_page = response.xpath("//div[@class='mw-allpages-nav']/a[contains(.,'Next page')]/@href").extract_first()
55
+
56
+ if next_page:
57
+ yield scrapy.Request(url=response.urljoin(next_page), callback=self.parse, dont_filter=True)
58
+
59
+ def parse_page(self, response: TextResponse):
60
+ categories = set(response.xpath(categories_xpath).extract())
61
+ #print(categories)
62
+ if len(categories.intersection(excluded_categories))==0:
63
+ name = response.meta['name']
64
+ url = response.meta['url']
65
+ article_id = int(re.findall(article_id_pattern, response.text)[0])
66
+
67
+ # una volta trovato l'id, usa l'API di fandom per recuperare solo il testo della voce
68
+ page = fandom.page(pageid = article_id)
69
+
70
+ text = page.plain_text.split('\nAppearances\n')[0].split('\nReferences\n')[0]
71
+
72
+ json_content={'name': name, 'url':url, 'text':text}
73
+
74
+ with open(f'./data/{name}.json','w', encoding='utf-8') as fout:
75
+ json.dump(json_content, fout)
76
+
77
+
78
+
79
+
80
+
81
+
82
+
data/index/my_faiss_index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9e1876f7697ab4175d029ee6d15fb7a479513a9d4b2eec391ceebd2e77592bb
3
+ size 8635437
data/index/my_faiss_index.json ADDED
@@ -0,0 +1 @@
 
1
+ {"similarity": "dot_product", "embedding_dim": 768}
data/input_docs/%2522Diane...%2522_-_The_Twin_Peaks_Tapes_of_Agent_Cooper.json ADDED
@@ -0,0 +1 @@
 
1
+ {"name": "%22Diane...%22_-_The_Twin_Peaks_Tapes_of_Agent_Cooper", "url": "https://twinpeaks.fandom.com/wiki/%22Diane...%22_-_The_Twin_Peaks_Tapes_of_Agent_Cooper", "text": "\"Diane...\" - The Twin Peaks Tapes of Agent Cooper\n\"Diane...\" - The Twin Peaks Tapes of Agent Cooper, is a two-sided audio cassette released on October 1, 1990 by Simon & Schuster. It compiles many of the recorded diary entries of Dale Cooper to his assistant Diane that had been featured in the first season and the first episode of the second one, along with new specially-recorded entries written by Scott Frost. It has been re-released as an audiobook.\nSome entries from the original broadcasts are not included on the tapes. The tape begins with a monologue taking place before the pilot in which Cooper discusses his impending trip to Twin Peaks and continues with the initial monologue heard in the pilot, then eventually concludes with the day when he was shot.\nFor his work, Kyle MacLachlan was nominated for a Grammy Award for best spoken-word performance.\nText\n\nFebruary 24\nTesting. One, two. Testing. Diane, it\u2019s 8 a.m., Seattle, Washington. As you have no doubt surmised, by the clarity of this tape, I\u2019ve purchased a new Micro-Mac pocket tape recorder \u2013 \"the big little recorder\" \u2013 at Wally\u2019s Rent-to-Own, 1145 North Hilltop \u2013 where, as the sign says, \"a bargain is a bargain, no matter what the cost\" \u2013 for twenty-one dollars and eighty-nine cents, cash. I decided to pass on the rent-to-own option, Diane. Leasing may be the fast track to an appearance of affluence, but equity will keep you warm at night. I have no doubt that this new model will prove to be an extremely useful tool in the investigatory process- where the most fleeting insight can be lost if your hardware isn\u2019t as solid as you think it is.\nI have two stops to make, Diane. Wo\u2019s House of Cloth, where I am picking up a new black suit, upping my total to five \u2013 one for each day of the week, presuming I don\u2019t have to work weekends. Frequently not a safe assumption. One hundred, ninety-nine dollars and ninety-nine cents, including alterations. Second stop, the regional Bureau office to pick up some files. Although I have wrapped up the fiber-sample procedure seminar I came here to conduct, it looks like I\u2019ll be heading east on a new case, instead of back to Philadelphia. Will fill you in on the details after I\u2019ve been briefed.\nDiane, 9 a.m., preparing to board Flight 210. Commuter flight, fifteen seater, arriving in Spokane at 10:15 a.m. One meal, breakfast. Eggs, sausage, toast, jam, juice and the usual coffee-scented hot water. What airlines do to coffee shouldn\u2019t happen to a dog, so I am packing a hot thermos from the commissary.\nCase number is: 11219er, you\u2019ll have a copy of the file on your desk by the time you receive this. Victim: seventeen year old white female, dead, bound and wrapped in plastic. Cause of death unknown. Says here she was the Homecoming Queen. Second victim, discovered alive, was found across the state line, which is why it\u2019s our business now. Suspects are in custody. Will assess their value upon arrival.\nDiane, I understand the air is so clear out where I am going, that you can see across two states when it\u2019s not raining, which is most of the time. So I\u2019ve packed a pair of the business man\u2019s friend, totes for the feet.\nI\u2019ve been scanning active files for the region. Note: possible correlation to a murder last year of one Teresa Banks, in the southwest corner of the state. Had all the trappings of a serial killing, except for one: a second body. Maybe this is it. Teresa Banks died a year ago, almost to the day.\nDiane, airborne. At what the pilot unintelligibly assures us is 17,000 feet. Dozed through breakfast and, for obvious reasons, am now awake. From the remains on the mans tray across the aisle, my stab at the menu was on the money, except they served bacon instead of sausage. Should be on the ground in thirty minutes, which will be about the time I empty my thermos, although the pilot did mention something about a storm front up ahead. Of course, statistically, when encountering weather aloft, the odds are in your favor in a smaller plane.\nDiane, time, at this moment, seems of little importance. If, by chance, we should not pass through this incredible maelstrom, you will find instructions for the disposition of my worldly goods and my remains, should there be any, filed under \"Things to Come.\"\nDiane, 10:30 a.m. *kissing noises* Back on solid ground. I should say solid asphalt. I\u2019m on my hands and knees in the parking lot of the branch office in Spokane. I\u2019m picking up a Bureau car. No, no, no, I\u2019m all right! Just thankful to be alive.\nLooks like I\u2019ll be heading out Highway 2, due east. I\u2019ve got about two hours of driving ahead of me. That\u2019ll include lunch, one pit stop and two cups of coffee, unless I refill my thermos. And, Diane, for the last fifteen miles, I\u2019m supposed to look out for deer on the highway. Man, wouldn\u2019t that be something?\nDiane, 11:30 a.m., February 24th. Entering the town of Twin Peaks, five miles south of the Canadian border, twelve miles west of the state line. Never seen so many trees in my life. As W.C. Fields would say, \"I'd rather be here than Philadelphia.\" 54 degrees on a slightly overcast day, weatherman said rain. If you could get paid that much for being wrong 60% of the time, you'd be working. Mileage is 79,345, gauge is on reserve. Riding on fumes here. I've got to tank up when I get into town. Remind me to tell you how much that is. Lunch was, uh, six dollars and thirty one cents at the Lamplighter Inn. That's on Highway 2, near Lewis Fork. That was a tuna fish sandwich, slice of cherry pie and a cup of coffee. Damn good food. Diane, if you ever get up this way, that cherry pie is worth a stop.\"Pilot\"\nOkay... looks like I'll be meeting up with a, uh, Sheriff Harry S. Truman. Shouldn't be too hard to remember that. He'll be at the Calhoun Memorial Hospital. Guess we're going to go up to Intensive Care and take a look at that girl that crawled down the railroad tracks off the mountain. I'm pretty sure I'll be checking into a motel. I'm sure the sheriff will be able to recommend a clean place, reasonably priced. That's what I need, clean place, reasonably priced. Oh, Diane, I almost forgot. I've got to find out what kind of trees these are. They're really something special.\"Pilot\"\nDiane, 3:10 p.m. Walking out of the elevator on the second floor of Calhoun Memorial Hospital, Twin Peaks. And if I\u2019m not mistaken, the man approaching me, wearing the gun, will be Sheriff Harry. S. Truman."}
data/input_docs/%27Tis_Pity_She%27s_a_Whore.json ADDED
@@ -0,0 +1 @@
 
1
+ {"name": "%27Tis_Pity_She%27s_a_Whore", "url": "https://twinpeaks.fandom.com/wiki/%27Tis_Pity_She%27s_a_Whore", "text": "'Tis Pity She's a Whore\n'Tis Pity She's a Whore was a play performed by the Twin Peaks Timber Players.\nBehind the scenes\n'Tis Pity She's a Whore is a tragedy by John Ford, first performed between 1629 and 1633."}
data/input_docs/17_Pieces_of_Pie%3A_Shooting_at_the_Mar_T_(aka_RR)_Diner.json ADDED
@@ -0,0 +1 @@
 
1
+ {"name": "17_Pieces_of_Pie:_Shooting_at_the_Mar_T_(aka_RR)_Diner", "url": "https://twinpeaks.fandom.com/wiki/17_Pieces_of_Pie:_Shooting_at_the_Mar_T_(aka_RR)_Diner", "text": "17 Pieces of Pie: Shooting at the Mar T (aka RR) Diner\n\"17 Pieces of Pie: Shooting at the Mar T (aka RR) Diner\" is a featurette originally included on the 2001 DVD release Twin Peaks: The First Season. It was later re-released in the Blu-ray sets Twin Peaks: The Entire Mystery and Twin Peaks: From Z to A.\nIt features a 2000 interview with Pat Cokewell, who was the owner of the Mar-T Caf\u00e9 when it was used for filming the Double R Diner in Twin Peaks and Twin Peaks: Fire Walk with Me.\nTrivia\nThe title and opening text of the featurette erroneously refer to the caf\u00e9 as \"Mar T Diner.\"\nvteTwin Peaks home video releasesThe First Season\n\"17 Pieces of Pie\"\n\"An Introduction to David Lynch\"\n\"Learning to Speak in the Red Room\"\n\"Mark Frost Telephone Interview\"\n\"Postcards From The Cast\"\nLog Lady introductions\nThe Second Season\n\"Cast Interview\"\n\"Crew Interview\"\nFire Walk with Me (Criterion)\n\"Reflections on the Phenomenon of Twin Peaks\"\nDefinitive Gold Box Edition\n\"A Slice of Lynch\"\n\"Location Guide\"\n\"Return to Twin Peaks\"\n\"Secrets from Another Place: Creating Twin Peaks\"\n\"Saturday Night Live\"\nGeorgia Coffee commercials\nTwin Peaks Sheriff's Hotline\nThe Entire Mystery\nTwin Peaks: The Missing Pieces\n\"Atmospherics\"\n\"Between Two Worlds\"\n\"Moving Through Time: Fire Walk with Me Memories\"\nA Limited Event Series\nImpressions: A Journey Behind the Scenes of Twin Peaks (The Man with the Gray Elevated Hair\nTell It Martin\nTwo Blue Balls\nThe Number of Completion\nBad Binoculars\nSee You on the Other Side Dear Friend\nDo Not Pick Up Hitchhikers\nA Bloody Finger In Your Mouth\nThe Polish Accountant\nA Pot of Boiling Oil)\nTwin Peaks: The Phenomenon\n\"Behind the Red Curtain\"\n\"I Had Bad Milk in Dehradun\"\n\"A Very Lovely Dream: One Week in Twin Peaks\"\nThe Television CollectionTBAFrom Z to A\nBehind the Curtain\nOn the Couch\nA Talk with Kyle MacLachlan and Sheryl Lee\nOthers\"Twin Peaks Festival Greeting\""}
data/input_docs/19th_century.json ADDED
@@ -0,0 +1 @@
 
1
+ {"name": "19th_century", "url": "https://twinpeaks.fandom.com/wiki/19th_century", "text": "19th century\nThe following events occurred in the 19th century (1801 \u2013 1900 AD):\nEvents\nDecember 19, unknown year \u2013 A nine-foot-tall \"giant\" is unearthed from an ancient burial mound in Maple Creek, Wisconsin.\nOver the American West, \"mystery airships\" are witnessed in the skies throughout the 19th century.\nDecades\n1800s\n1810s\n1820s\n1830s\n1840s\n1850s\n1860s\n1870s\n1880s\n1890s"}
data/input_docs/1st_to_18th_century.json ADDED
@@ -0,0 +1 @@
 
1
+ {"name": "1st_to_18th_century", "url": "https://twinpeaks.fandom.com/wiki/1st_to_18th_century", "text": "1st to 18th century\nThe following events took place in the 1st to 18th century (1 \u2013 1800 AD):\nEvents\n\n12th century\nThe Welsh prince Madoc allegedly settles and founds colonies of \"white Indians\" along the Mississippi River, including the Mandan tribe.\n15th century\nThe Freemasons are first founded in Europe.\n16th century\nRabelais creates the concept of the Church of Thelema in a satirical novel.\n1542 \u2013 Juan Rodriguez Cabrillo is the first western explorer to reach Puget Sound.\nc. 1572 \u2013 Puget Sound is renamed Nova Albion by Francis Drake in honor of Queen Elizabeth I.\n17th century\n1670 \u2013 The king of England grants the Hudson Bay Company its charter.\n18th century\nUnknown year \u2013 A new style of beaver hat is popularized in Europe.\n1741 \u2013 Russian explorers Vitus Behring and Peter Cherikof explore Puget Sound.\nSpanish explorer Gaspar de Portola discovers and names the Arroyo Seco canyon in southern California.\n1776 \u2013 A new style of beaver hat is popularized in Europe.\n1787 \u2013\nGeneral James Wilkinson, commander of the United States Army, becomes a double agent for the Spanish crown.\nThe Moose Massacre of more than fifty animals occurs in Twin Peaks' salt marshes.\nMay \u2013 French trapper and pedophile Gaston Leroux dies next to a lot of moss near Owl Cave after being the first to write that moss always grows to the north.\n1791 \u2013 Titanium is discovered.\n1793 \u2013 Alexander MacKenzie is the first Caucasian to reach the Pacific Northwest.\n1795 \u2013 Ivan Pritikoff concludes the Vladivostok Treaty with the Chinook tribe.\nBirths\n1775 \u2013 Dominick Renault\nDeaths\nMay 1787 \u2013 Gaston Leroux"}
data/input_docs/20th_century.json ADDED
@@ -0,0 +1 @@
 
1
+ {"name": "20th_century", "url": "https://twinpeaks.fandom.com/wiki/20th_century", "text": "20th century\nThe following events occurred in the 20th century (1901 \u2013 2000 AD):\nEvents\nTo be added\nDecades\n1900s\n1910s\n1920s\n1930s\n1940s\n1950s\n1960s\n1970s\n1980s\n1990s"}
data/input_docs/21st_century.json ADDED
@@ -0,0 +1 @@
 
1
+ {"name": "21st_century", "url": "https://twinpeaks.fandom.com/wiki/21st_century", "text": "21st century\nThe following events occurred in the 21st century (2001 \u2013 2017 AD):\nEvents\nMarch 27, 2000\u20132016 \u2013 Annie Blackburn says \"I'm fine\" at 8:38 a.m. on this day every year.\nDecades\n2000s\n2010s"}
data/input_docs/A_Real_Indication.json ADDED
@@ -0,0 +1 @@
 
1
+ {"name": "A_Real_Indication", "url": "https://twinpeaks.fandom.com/wiki/A_Real_Indication", "text": "A Real Indication\n\"A Real Indication\" is a song composed by Angelo Badalamenti, credited to the Thought Gang project. The song appears in Twin Peaks: Fire Walk With Me during the scene where Laura and Bobby talk and say goodbye after school and also during the scene in which Laura finds that there are missing pages in her diary.\nDescription found in \"David Lynch: The Lime Green Set\"\n David Lynch & Badalamenti's 'Thought Gang - A Real Indication \n\"Filmed on Hi-8 in 1992, this DIY music video is a rare visual document of Lynch & Badalamenti's 'Thought Gang.' An esoteric jazz side-project born amidst the landscape of Twin Peaks, 'Thought Gang' featured several talented players in a mostly improvisational setting with Badalamenti providing vocal duties. 'A Real Indication' was the first of around 10 tracks to be recorded under the Thought Gang moniker, and one of only two Thought Gang tracks to have been released over the years (both 'A Real Indication' and 'The Black Dog Runs at Night' are included on the soundtrack to Twin Peaks: Fire Walk With Me). The remaining tracks await an LP release in 2011.\"\nLyrics\nSo I'm goin' down this street\nAnd I'm tryin' not to smile\n'Cause the street is where I'm goin'\nAnd the curb is at the side\nBy the sewer\nWhere the rain goes down\nLike this girl I once knew\n'Cause the sewer is so hollow\nAnd the yell\nCould last forever\nLike the night my girl went away\nGone off in a world filled with stuff\nLights start changin'\nAnd there's wires in the air\nAnd the asphalt man\nIs all around me\nAnd I look down\nAnd my shoes are so far away from me, man\nI can't believe it\nI got a real indication\nOf a laugh comin' on\nThat old wind\nIs howling like a cold steel train\nGirl has left me\nNot comin' back again\nGot rusted bullet holes in the Dodge\nAnd a heartburn like a solar flare\nThe grass by the house is dry, man\nAnd a horsefly\nBuzzes\nBy the big mistake In the distance, man\nI see myself start to smile\nI got a real indication of a laugh comin' on"}
data/input_docs/A_Slice_of_Lynch.json ADDED
@@ -0,0 +1 @@
 
1
+ {"name": "A_Slice_of_Lynch", "url": "https://twinpeaks.fandom.com/wiki/A_Slice_of_Lynch", "text": "A Slice of Lynch\nA Slice of Lynch is a behind-the-scenes featurette originally released with the 2007 DVD Twin Peaks: Definitive Gold Box Edition. It features a conversation between Twin Peaks co-creator David Lynch, actors M\u00e4dchen Amick and Kyle MacLachlan, and post-production supervisor John Wentworth.\nAn uncut version of the featurette was released with the Blu-ray releases Twin Peaks: The Entire Mystery (2014) and Twin Peaks: From Z to A (2019).\nvteTwin Peaks home video releasesThe First Season\n\"17 Pieces of Pie\"\n\"An Introduction to David Lynch\"\n\"Learning to Speak in the Red Room\"\n\"Mark Frost Telephone Interview\"\n\"Postcards From The Cast\"\nLog Lady introductions\nThe Second Season\n\"Cast Interview\"\n\"Crew Interview\"\nFire Walk with Me (Criterion)\n\"Reflections on the Phenomenon of Twin Peaks\"\nDefinitive Gold Box Edition\n\"A Slice of Lynch\"\n\"Location Guide\"\n\"Return to Twin Peaks\"\n\"Secrets from Another Place: Creating Twin Peaks\"\n\"Saturday Night Live\"\nGeorgia Coffee commercials\nTwin Peaks Sheriff's Hotline\nThe Entire Mystery\nTwin Peaks: The Missing Pieces\n\"Atmospherics\"\n\"Between Two Worlds\"\n\"Moving Through Time: Fire Walk with Me Memories\"\nA Limited Event Series\nImpressions: A Journey Behind the Scenes of Twin Peaks (The Man with the Gray Elevated Hair\nTell It Martin\nTwo Blue Balls\nThe Number of Completion\nBad Binoculars\nSee You on the Other Side Dear Friend\nDo Not Pick Up Hitchhikers\nA Bloody Finger In Your Mouth\nThe Polish Accountant\nA Pot of Boiling Oil)\nTwin Peaks: The Phenomenon\n\"Behind the Red Curtain\"\n\"I Had Bad Milk in Dehradun\"\n\"A Very Lovely Dream: One Week in Twin Peaks\"\nThe Television CollectionTBAFrom Z to A\nBehind the Curtain\nOn the Couch\nA Talk with Kyle MacLachlan and Sheryl Lee\nOthers\"Twin Peaks Festival Greeting\""}
data/input_docs/A_Very_Lovely_Dream%3A_One_Week_in_Twin_Peaks.json ADDED
@@ -0,0 +1 @@
 
1
+ {"name": "A_Very_Lovely_Dream:_One_Week_in_Twin_Peaks", "url": "https://twinpeaks.fandom.com/wiki/A_Very_Lovely_Dream:_One_Week_in_Twin_Peaks", "text": "A Very Lovely Dream: One Week in Twin Peaks\n\"A Very Lovely Dream: One Week in Twin Peaks\" is a featurette directed by Charles de Lauzirika exclusive to the Blu-ray release of Twin Peaks: A Limited Event Series and Twin Peaks: From Z to A.\nThe featurette consists of behind the scenes footage of on-location filming of Twin Peaks (2017) in Washington, accompanied with narration by cast and crew.\nNarrators\nKyle MacLachlan\nSabrina S. Sutherland\nMark Frost\nHarry Goaz\nMichael Horse\nDana Ashbrook\nRuth De Jong\nKimmy Robertson\nDavid Lynch\nvteTwin Peaks home video releasesThe First Season\n\"17 Pieces of Pie\"\n\"An Introduction to David Lynch\"\n\"Learning to Speak in the Red Room\"\n\"Mark Frost Telephone Interview\"\n\"Postcards From The Cast\"\nLog Lady introductions\nThe Second Season\n\"Cast Interview\"\n\"Crew Interview\"\nFire Walk with Me (Criterion)\n\"Reflections on the Phenomenon of Twin Peaks\"\nDefinitive Gold Box Edition\n\"A Slice of Lynch\"\n\"Location Guide\"\n\"Return to Twin Peaks\"\n\"Secrets from Another Place: Creating Twin Peaks\"\n\"Saturday Night Live\"\nGeorgia Coffee commercials\nTwin Peaks Sheriff's Hotline\nThe Entire Mystery\nTwin Peaks: The Missing Pieces\n\"Atmospherics\"\n\"Between Two Worlds\"\n\"Moving Through Time: Fire Walk with Me Memories\"\nA Limited Event Series\nImpressions: A Journey Behind the Scenes of Twin Peaks (The Man with the Gray Elevated Hair\nTell It Martin\nTwo Blue Balls\nThe Number of Completion\nBad Binoculars\nSee You on the Other Side Dear Friend\nDo Not Pick Up Hitchhikers\nA Bloody Finger In Your Mouth\nThe Polish Accountant\nA Pot of Boiling Oil)\nTwin Peaks: The Phenomenon\n\"Behind the Red Curtain\"\n\"I Had Bad Milk in Dehradun\"\n\"A Very Lovely Dream: One Week in Twin Peaks\"\nThe Television CollectionTBAFrom Z to A\nBehind the Curtain\nOn the Couch\nA Talk with Kyle MacLachlan and Sheryl Lee\nOthers\"Twin Peaks Festival Greeting\""}
data/input_docs/Aaron_Burr.json ADDED
@@ -0,0 +1 @@
 
1
+ {"name": "Aaron_Burr", "url": "https://twinpeaks.fandom.com/wiki/Aaron_Burr", "text": "Aaron Burr\nAaron Burr was a United States Vice President.\nAfter killing Alexander Hamilton in a duel on the New Jersey Palisades, Burr fled west.\nAlong with several co-conspirators, Burr plotted to seize land in Texas, Mexico, and Louisiana to establish a new republic, which Burr planned to lead. In 1805, General James Wilkinson warned President Thomas Jefferson of the plot.\nBurr was arrested and put on trial in 1807.\nBehind the scenes\nAaron Burr (February 6, 1756 \u2013 September 14, 1836) was the third Vice President of the United States, best remembered for his killing of Alexander Hamilton in an 1804 duel and his arrest for treason in 1807."}
data/input_docs/Abandoned_cabin.json ADDED
@@ -0,0 +1 @@
 
1
+ {"name": "Abandoned_cabin", "url": "https://twinpeaks.fandom.com/wiki/Abandoned_cabin", "text": "Abandoned cabin\nAn abandoned cabin near Twin Peaks, Washington was a temporary refuge for Windom Earle.\nHistory\nIn search of the Black Lodge, Earl arrived at Twin Peaks, Washington and sat down in small abandoned cabin in the nearby woods. Windom welcomed Leo Johnson, who he found to be a criminal and began to use him as a pawn, to the point of placing a shock collar around Johnson's neck.\nThe next day, Earle check on Leo's progress with arrow-making for him.\nIn the woods, Earle encountered a youth named Rusty Tomasky and promised him beer and a party. Back at the cabin, he told Tomasky about the White Lodge, a good place, and its evil opposite, the Black Lodge. The latter he said he intended to find. Rusty became restless, wondering about the beer and party. Earle told the youth \"in good time\" and played a flute.\nAfter Leo and Earle in a horse costume euthanized Major Garland Briggs, they brought him to cabin. Earle interrogated Briggs about Owl Cave, but the Major stayed silent, so Earle injected him with a truth serum, and Briggs said he first saw the petroglyph at Owl Cave in a dream. He said that the symbols meant that \"there's a time if Jupiter and Saturn meet, they will receive you.\" Earle deciphered the meaning of this statement and discovered that the petroglyph was a map to the Black Lodge.\nEarle went to the cabin in the morning with corpse-like skin tone and black teeth and found that Major Briggs had escaped with Leo's help. He did not immediately punish Leo but grinned whilst waving a bag in front of his face. He left Leo as the cabin with a cage of tarantulas hanging above his head, suspended by a string held in Leo's teeth.\nAn unknown assailant later came to the cabin and shot Leo to death."}
data/input_docs/Abandoned_gas_station.json ADDED
@@ -0,0 +1 @@
 
1
+ {"name": "Abandoned_gas_station", "url": "https://twinpeaks.fandom.com/wiki/Abandoned_gas_station", "text": "Abandoned gas station\nAn abandoned gas station was located at 1400 River Road in Twin Peaks, Washington.\nHistory\nOn November 10, 1985, Laura Palmer saw a vision of 1400 River Road, and she rode there on her pony Troy. When Laura arrived, she encountered Margaret Lanterman, who told Laura of several things she knew about her and of owls."}
data/input_docs/Abbie.json ADDED
@@ -0,0 +1 @@
 
1
+ {"name": "Abbie", "url": "https://twinpeaks.fandom.com/wiki/Abbie", "text": "Abbie\nAbbie was a patron at the Roadhouse in Twin Peaks, Washington.\nBiography\nOne night at the Roadhouse, Abbie spotted her acquaintances Mary and Clark canoodling in a booth across the room, where apparently \"lots of people\" also saw them.\nTwo nights later, Abbie met up there with Natalie over beer. She asked where Angela was, and Natalie replied that she was probably out with Clark somewhere. Surprised, Abbie told her about seeing Clark and Mary together. They agreed that Angela wouldn't take the betrayal well if she found out, as she had just lost her mother and stopped taking medication. They were interrupted by Trick, who rushed into their booth and reported he had been run off the road on the way over. After Trick left to buy a round, Abbie asked if he was still under house arrest, but Natalie replied that his sentence was up."}
data/input_docs/Accountant.json ADDED
@@ -0,0 +1 @@
 
1
+ {"name": "Accountant", "url": "https://twinpeaks.fandom.com/wiki/Accountant", "text": "Accountant\nAn accountant was present at the farm in Montana.\nBiography\nAfter Dale Cooper's doppelganger defeated Renzo in an arm wrestle and killed him \u2013 thus becoming leader of the faction housed at the farm \u2013 the accountant asked him if he needed any money before leaving him alone with Ray Monroe."}
data/input_docs/Ace%27s_Bar-B-Que.json ADDED
@@ -0,0 +1 @@
 
1
+ {"name": "Ace%27s_Bar-B-Que", "url": "https://twinpeaks.fandom.com/wiki/Ace%27s_Bar-B-Que", "text": "Ace's Bar-B-Que\n\"His ribs are tender, and a lot of visitors buy the sauce for Christmas presents. It's a good idea to inquire what the specialty of the day is\u2013in case it's barbecued goat. (Not recommended for the squeamish.)\"\nAce's Bar-B-Que was a barbecue restaurant in Twin Peaks, Washington."}
data/input_docs/Agnes_Turnquist.json ADDED
@@ -0,0 +1 @@
 
1
+ {"name": "Agnes_Turnquist", "url": "https://twinpeaks.fandom.com/wiki/Agnes_Turnquist", "text": "Agnes Turnquist\nAgnes Turnquist was a diarist who wrote an entry on July 19, 1893 alluding to the existence of the Ledoux Tannery."}
data/input_docs/Al.json ADDED
@@ -0,0 +1 @@
 
1
+ {"name": "Al", "url": "https://twinpeaks.fandom.com/wiki/Al", "text": "Al\n\"Red door!\"\nAl was a chauffeur in the employ of the Silver Mustang Casino in Las Vegas, Nevada.\nBiography\nAl was assigned to drive home \"Dougie Jones\" (actually Dale Cooper) one night after Cooper won several hundred thousand dollars at the casino's slots machines. Cooper could only say that his house on Lancelot Court had a red door. As they drove down the street, Al struggled to make out the doors on each house, saying that it was hard to see without the sun. Finally, he spotted the correct house and pulled over. Cooper exited and stood, silently, holding his winnings, so Al exited the car and agreed to wait with him as long as necessary. An owl flew overhead, hooting, and Al remarked that he found them \"spooky.\" Finally, Janey-E Jones emerged from the house and slapped Cooper, screaming at him for disappearing without warning. Al apologetically explained that he had been assigned to drive Dougie home, and promptly left.\nThe following week, Al picked up Cooper at the Lucky 7 Insurance office, and recalled him as the \"red door\" man. Instead of driving to Santino's as Cooper had been promised, Al drove him out into the desert, where Rodney and Bradley Mitchum were waiting to kill him. However, they were allayed when they discovered Cooper had brought them a cherry pie."}
data/input_docs/Al_Cooper.json ADDED
@@ -0,0 +1 @@
 
1
+ {"name": "Al_Cooper", "url": "https://twinpeaks.fandom.com/wiki/Al_Cooper", "text": "Al Cooper\nAl Cooper was the uncle of Dale Cooper.\nBiography\nAl worked as a magician, including a dinner show act with a dog. He did not have a great relationship with his brother and sold Bibles when he was not doing much business as a magician. Once, he taught his nephew, Dale how to count cards and took him to a social club to watch him play cards.\nHe later helped his brother's family when his sister-in-law died in 1969."}
data/input_docs/Alan_Traherne.json ADDED
@@ -0,0 +1 @@
 
1
+ {"name": "Alan_Traherne", "url": "https://twinpeaks.fandom.com/wiki/Alan_Traherne", "text": "Alan Traherne\nAlan Traherne was a motion picture sound technician from Twin Peaks, Washington.\nBiography\nTraherne attended Warren G. Harding Elementary School in the same class as Margaret Coulson and Carl Rodd. On September 9, 1947, all three were abducted while on a nature hike near the Pearl Lakes. The incident was witnessed by Douglas Milford, who had been compelled by an unknown force to visit a location in the woods he remembered from his own childhood. There, a large dark shape shone a bright beam of light onto the three children, who then disappeared along with the object.\nTraherne, Rodd, and Coulson were found at the nearby campground the following day by Andrew Packard's scout troop. They were unharmed and appeared to believe that only an hour had passed since they entered the woods. All three underwent a medical exam and were released.\nTraherne graduated from Twin Peaks High School in 1958 and attended community college in Spokane. He moved to Los Angeles and found work as a sound technician in the motion picture industry. Records indicated that he may have suffered post-traumatic stress disorder and attended a survivors' group for UFO abductees in the 1980s.\nTraherne passed away in 1988 due to complications from cancer."}
data/input_docs/Albert_Furrer.json ADDED
@@ -0,0 +1 @@
 
1
+ {"name": "Albert_Furrer", "url": "https://twinpeaks.fandom.com/wiki/Albert_Furrer", "text": "Albert Furrer\n\"A very fuzzy thinker when they called the plays, and especially when it came to girls.\"\nAlbert Furrer was a player on the Twin Peaks High School football team during the 1968 season. His jersey number was 34."}
data/input_docs/Albert_Rosenfield.json ADDED
@@ -0,0 +1 @@
 
1
+ {"name": "Albert_Rosenfield", "url": "https://twinpeaks.fandom.com/wiki/Albert_Rosenfield", "text": "Albert Rosenfield\n\"Albert's path is a strange and difficult one.\"\n \u2015Dale Cooper\nAlbert Rosenfield was a forensics specialist in the Federal Bureau of Investigation who assisted in the investigation of Laura Palmer's murder in 1989. Twenty-five years later, he assisted FBI deputy director Gordon Cole on the Blue Rose case relating to Dale Cooper's reappearance in South Dakota.\nBiography\n\nEducation and early career\nAlbert attended Yale University prior to entering the FBI Academy.\nHe first met Special Agent Dale Cooper in February of 1978.\nReappearance of Agent Jeffries\nIn February 1989, Albert watched as previously missing Special Agent Phillip Jeffries came to the FBI offices in Philadelphia, stating that he was not going to talk about \"Judy\" and questioned who Agent Cooper was. Jeffries then told Albert, Cooper, and Regional Bureau Chief Gordon Cole of a sort of meeting he was at, above a convenience store. This confused Albert, Cooper, and Gordon further. He also went on saying he and other individuals lived inside a dream. Jeffries said he found \"them\" in Seattle at Judy's and followed them. Gordon then told Albert to go get a mineral water and Jeffries disappeared. Albert then called the front desk, who said Phillip was never there.\nMurder of Laura Palmer\nLater, Cooper told Albert that he predicted that Teresa Banks' killer would strike again, and the victim would be a woman, blonde, in high school, sexually active, using drugs, and crying out for help. Albert responded, \"Well, damn, Cooper, that really narrows it down. You're talking about half the high school girls in America!\"\nOn February 24, Cooper's prediction came true. Albert was assigned to assist Cooper in the investigation of murdered high school student Laura Palmer, per Cooper's instruction.\nAlbert called the Twin Peaks sheriff's department the following day, discussing with Agent Cooper his plans to perform an autopsy on Laura Palmer.\nHe arrived in Twin Peaks with his team on February 26, telling the sheriff's station receptionist Lucy Moran to inform Cooper of his arrival. He then met Sheriff Harry S. Truman, with whom he immediately had a tense relationship due to Albert's belief that the small town's sheriff's department was primitive and amateur, and was unimpressed by the findings of the autopsy. He was threatened by Harry but brushed it off as he got to work with his team.\nThe following day, Albert got into a fight with Doctor Will Hayward, who wished to stop his further testing on Laura's body so it could be buried for the funeral. When the argument became physical, he was restrained by Benjamin Horne, who took Hayward's side. Albert insisted the funeral should have been delayed as he continued his tests. The disagreement became physical once again before Sheriff Truman and Agent Cooper arrived. Albert let out a string of insults to Sheriff Truman, who punched him causing Albert to land on top of Laura's body. Cooper intervened and sent the Sheriff out, then ordered Albert to let the body be released.\nHe later presented his findings to Cooper and Truman. Laura was a cocaine user, she was bound with two types of twine, and industrial soap particles were found on the back of her neck. He also noted that wounds on her shoulders appeared to be claw marks. Lastly, he presented a partially-digested plastic fragment bearing the letter 'J.'\nWhen Truman excused himself to go to Laura's funeral, Albert kept Cooper behind, asking him to sign a form regarding the physical attack by the sheriff earlier in the day, but Cooper refused.\nFive days later, Albert returned to Twin Peaks under Cole's orders after Cooper had been shot. He went to the home of Leo Johnson, where Deputy Andy Brennan ran inside to warn Sheriff Truman, but stepped on a loose board, hitting himself in the head. This revealed a hidden pair of boots and cocaine.\n \nBack at the Sheriff's station, Albert examined Cooper's gunshot wound. Deputy Andy entered and informed Cooper that Leo was imprisoned in Hungry Horse, Montana on February 9, 1988, confirming an alibi for the Teresa Banks murder, therefore eliminating him as a suspect in Laura's murder, if they were indeed killed by the same person.\nHe later walked through the hospital with Truman and Cooper. When they saw Ed Hurley, Truman pours Albert a coffee to leave Cooper alone with Big Ed for a moment. Albert was amused by Ed's story of shooting his wife's eye out on a honeymoon hunting trip. He then went to book a room at the Great Northern Hotel.\nLater, he sat in the conference room, where he and Cooper went over the events that occurred the night Laura died. Andy cried over a picture of Laura, causing Albert to mock him, upsetting him further.\n \nThe next morning, Albert had breakfast with Cooper, who told him about Tibet and that Ronette Pulaski\u2014who had been present at Laura's murder\u2014had woken from her coma and said they would show her sketches of Leo Johnson and \"BOB,\" a man whom Sarah Palmer had a vision of, though Albert was skeptical about BOB even existing. He delivered the autopsy report on suspect Jacques Renault, having found that he was smothered with a pillow. He also suspected the Packard Sawmill fire to be a result of arson, and to have been done by Leo. Albert then informed Cooper that his renegade former partner Windom Earle had escaped from a mental institution.\nThe next day, he went to Ronette with Cooper and Truman as the former extracted a small piece of paper with the letter 'B' from underneath her fingernail. Cooper believed it to have been placed by Laura's killer, and Albert noted that Ronette's IV was tainted. Cooper then told Albert and Harry that he was visited by a giant who gave him three clues. Albert scoffed, \"any relation to the dwarf?\"\n \nAlbert later delivered the results on the cocaine found in James Hurley's gas tank as being from the same source as the drugs in Jacques's and Leo's possession. He also said that the letter 'B' under Ronette's finger was cut from a copy of Flesh World. He stated that the sketch of \"BOB\" had been run through several agencies, with no results and also noted that Cooper was shot with a Walther PPK, \"James Bond's gun,\" and that he found fibers left from the shooter. He insulted Truman yet again, causing the sheriff to take him by the collar. Albert explained his methods to be built on the foundation of love and calmly said, \"I love you, Sheriff Truman,\" before leaving.\nAfter the body of Laura Palmer's cousin, Maddy Ferguson was found, Albert presented a letter 'O' found under her ring finger, determining her killer to be the same person that murdered Teresa Banks and Laura. He also said that fur from a taxidermy white fox was in her hand. Albert then encouraged Cooper to find the killer before he struck again.\nHarry confirmed to Albert and Cooper his suspicion that Benjamin Horne was responsible for the deaths of Laura and Maddy. Albert then presented Horne's blood test.\nAlbert went to the Roadhouse, where Cooper called several men, including Ben Horne, Leland Palmer, and Leo Johnson. He believed the killer to be there and used \"magic\" to determine the culprit. He asked to have Ben Horne escorted back to the station, with Leland as his attorney.\nThey took Ben down to interrogation, but the sheriff and Cooper pushed Leland into the room, with the attorney being thrown into a rage, revealed as BOB's host.\nThey entered the room, where Leland was restrained and read his rights by Sheriff Truman. Through Leland, BOB laughed and howled as he was interrogated, soon confessing to the murders. Satisfied, the lawmen left the room.\n \nCooper then went over the evidence against Leland and the events that must have occurred the night of Laura's death. After BOB chanted a poem, the sprinkler system went off, and BOB slammed Leland's head into the interrogation room door. They frantically entered to find Leland bleeding on the floor, BOB apparently gone. Leland expressed his guilt and died in Cooper's arms.\nLater, with Dale and Harry, Albert discussed the nature of BOB. He came to the conclusion that Bob is the \"evil that men do.\"\nReturn of Windom Earle\nFollowing Windom Earle's arrival in Twin Peaks, taunting Cooper, Albert returned to the town. He went to the sheriff's station, where he enthusiastically greeted Truman with a hug before presenting a file on Windom Earle, under Cole's orders. He told them that Earle had been sending boxes to various law enforcement agencies, each containing a piece of wardrobe from his late wife Caroline Earle on their wedding day.\nLater, he confirmed to Cooper through forensic analysis that Josie Packard was the one who shot him and was the prime suspect in the murder of Jonathan Kumagai, though Cooper hoped for Harry's sake that this was not the case, as the sheriff and Josie were lovers.\nHe soon matched up the bullets fired at Cooper, and at Jonathan. He showed Cooper and was eager to arrest Josie, but Cooper wanted more evidence to be absolutely sure that she was guilty and then wished to see if she would confess first.\nLater, he came to the station to deliver more evidence of Josie's guilt to Cooper. However, Harry overheard them and left. Josie died later in the night of unknown causes.\nOn April 1, Albert performed an autopsy on Leo Johnson, who had been found dead inside a cabin in the woods. He concluded that Leo had been killed by gunshot wounds to the chest by an assailant who stood \"Bureau style,\" based on scuff marks found by the cabin's door. Due to this evidence, Albert theorized that the killer was Windom Earle.\nRe-emergence of Cooper\nIn September 2016, after Agent Tamara Preston briefed Albert and Gordon of the deaths of Sam Colby and Tracey Barberato, a call was received concerning Agent Cooper, who had been missing since 1989. It was found that he had been incarcerated in a federal prison in South Dakota, and Cole planned to go there with Albert and Preston in tow.\nUpon arrival, they were presented with the contents that were found in Cooper's trunk: cocaine, a machine gun, and a dog leg. They then met with Cooper, who claimed to have been working undercover with Phillip Jeffries and was on his way to debrief with Cole when he had his car accident that led to his incarceration.\nAfter this interview, Cole spoke with Albert privately, having noticed an unusual reaction to Cooper. Albert admitted to him that he had provided Jeffries with information several years before after Jeffries said it was pertinent to Cooper's safety. This information regarded an agent in Colombia, who was killed a week after Albert relayed this information. Albert and Gordon then discussed the meeting with Cooper, both feeling something was wrong. Cole asks Albert about a person that they planned to bring to Cooper and Albert noted that he knew where she drank.\nWith this knowledge, Albert went to Max Von's Bar, where he found Diane Evans, Cooper's former secretary.\nFollowing Diane's interview with Cooper, the group headed back to Philadelphia, but the plane was re-routed to Buckhorn, South Dakota after the body of Major Garland Briggs was found.\nDetective Dave Macklay explained to them that the body had been found in a bed along with the severed head of Ruth Davenport, who, along with her murder's prime suspect, William Hastings, had an interest in alternate dimensions. Albert noted that the body appeared to be the wrong age for Briggs, which caused Cole to connect it to Cooper.\nAfter having dinner with coroner Constance Talbot, Albert showed Gordon texts connected to Diane, pointing suspicion to her possibly being in league with Cooper. Tammy soon showed up, bringing them a picture of Cooper and another man with the glass box at the scene of Sam Colby and Tracey Barberato's deaths.\nHastings led Albert, Gordon, Tammy, and Diane to where he claimed to have entered \"the zone,\" which he claimed to be an alternate dimension. Albert and Gordon both saw \"dirty, bearded men\" and just before Gordon could be transported to \"the zone,\" Albert pulled him back and discovered a headless female body, presuming it to be Ruth Davenport's. He photographed it and noted that it had coordinates written on its arm, as Hastings said about Davenport. They returned to Macklay's car when Hastings suddenly died.\nMacklay later confirmed the body as Ruth Davenport's and Albert and Gordon described the bearded men they had seen, which Diane claimed to have also seen exit Macklay's car following Hastings' death.\nAlbert and Gordon inducted Tammy into the Blue Rose task force, then offered Diane an official position into the investigation into Cooper's re-appearance.\nHe later showed Gordon texts to and from Diane, reading \"Las Vegas?\" and \"THEY HAVEN'T ASKED YET.\"\nAlbert explained to Tammy the first Blue Rose case before Cole came to meet with them and Diane. After Diane confirmed that Cooper had mentioned Major Briggs during their last meeting, Albert explained the Major's significance and Diane states that she recognized the names on the ring found in Briggs' stomach as belonging to her half-sister and brother-in-law, Janey-E and Dougie Jones, who lived in Las Vegas. This prompted Cole to contact the FBI branch in Las Vegas, stating the Joneses to be suspects in a double murder.\nAfter Diane left, Gordon explained that he had contacted the Twin Peaks Sheriff's Department, who had found something from Laura Palmer's diary that indicated the existence of two Coopers. He then described a dream he had about Monica Bellucci, in which he recalled Jeffries' brief appearance at the FBI headquarters in 1989, particularly Jeffries question of who Cole thought Cooper was.\nDiane later met with Albert, Gordon, and Tammy to recount the evening she last saw Cooper. After describing the events that took place, Diane became upset and drew a gun from her purse. Albert and Tammy reacted in time, drawing their guns and shooting Diane, who then disappeared, prompting Tammy to note that Diane was actually a tulpa.\nAs the agents recovered from this, Cole received word from Las Vegas that Dougie Jones and Dale Cooper were one and the same. Tammy researched Dougie Jones, finding that his car had exploded, an assassination attempt had been made on him, and he had been in the company with two organized crime figures. Tammy further stated that he had been electrocuted by sticking a fork in a wall socket, which Albert noted as \"strange, even for Cooper.\" Cole then told them to get ready to leave, as he knew where Cooper was headed.\nThey met Cooper at the Twin Peaks Sheriff's Station, where Cooper's doppelganger and BOB had just been defeated. Cooper told them he wished to meet them again and stated that some things would change, as the past dictated the future.\nBehind the scenes\nAlbert was played by Miguel Ferrer, who later starred in Mark Frost and David Lynch's short-lived television series, On the Air, alongside Ian Buchanan and David L. Lander.\nFerrer posthumously appeared in the 2017 revival following his death on January 19, 2017.\nFerrer was one of three actors from the 1987 film RoboCop to appear in Twin Peaks, the other two being Ray Wise and Dan O'Herlihy.\nGallery\n\nTwin Peaks (1990-1991)\n\nTwin Peaks (2017)\n"}
data/input_docs/Aleister_Crowley.json ADDED
@@ -0,0 +1 @@
 
1
+ {"name": "Aleister_Crowley", "url": "https://twinpeaks.fandom.com/wiki/Aleister_Crowley", "text": "Aleister Crowley\nAleister Crowley was an English mystic whose teachings were the foundation of the Thelema religion. He was often referred to as \"the most evil man in the world.\"\nBiography\nWhile in Egypt, Crowley claimed to receive religious tenets from a higher power, incorporating them into the religion of Thelema. In the process, he supposedly restored knowledge of alchemy, an art lost to the centuries.\nCrowley also visited Tibet, becoming the first Westerner to study with the lamas.\nIn 1923, Crowley published Moonchild in which two groups of magicians fought over an unborn child who was potentially the Antichrist. Crowley himself tried to summon the supposed Moonchild, to no success.\nAfter Jack Parsons became leader of the Pasadena Thelema lodge, Crowley sent him a letter concerning the \"Hell Gate,\" one of seven such gates he believed to exist in the world. He encouraged Parsons to \"make use of\" this information.\nCrowley was a rampant drug user and died in 1944 at the age of 72.\nBehind the scenes\nAleister Crowley (October 12, 1875 \u2013 December 1, 1947) was an English occultist and magician who founded Thelema, a religion he developed after supposedly being visited by an entity named Aiwass."}
data/input_docs/Alexander_Hamilton.json ADDED
@@ -0,0 +1 @@
 
1
+ {"name": "Alexander_Hamilton", "url": "https://twinpeaks.fandom.com/wiki/Alexander_Hamilton", "text": "Alexander Hamilton\nAlexander Hamilton was an American Treasury secretary who was killed in a duel on the New Jersey Palisades by Vice President Aaron Burr.\nBehind the scenes\nAlexander Hamilton (January 11, 1755 or 1757 \u2013 July 12, 1804) was one of the Founding Fathers of the United States and the nation's first Secretary of the Treasury."}
data/input_docs/Alexander_MacKenzie.json ADDED
@@ -0,0 +1 @@
 
1
+ {"name": "Alexander_MacKenzie", "url": "https://twinpeaks.fandom.com/wiki/Alexander_MacKenzie", "text": "Alexander MacKenzie\nAlexander MacKenzie was the first Caucasian to reach the Pacific northwest overland, having done so in 1793.\nBehind the scenes\nAlexander Mackenzie (1764 \u2013 March 12, 1820) was a Scottish explorer most notable for his overland crossing of modern-day Canada."}
data/input_docs/Alice_Brady.json ADDED
@@ -0,0 +1 @@
 
1
+ {"name": "Alice_Brady", "url": "https://twinpeaks.fandom.com/wiki/Alice_Brady", "text": "Alice Brady\n\"Oh, it fell down.\"\nAlice Brady was an employee at a bank where Laura Palmer kept a safety deposit box.\nFollowing Laura's murder, Alice gave FBI Special Agent Dale Cooper and Sheriff Harry S. Truman her safety deposit box that she kept at the bank for about six months.\nBehind the scenes\nIn the pilot's original script, Alice was a uniformed bank guard. It is unknown if the bank Alice works at is the Twin Peaks Savings and Loan."}
data/input_docs/Alice_Tremond.json ADDED
@@ -0,0 +1 @@
 
1
+ {"name": "Alice_Tremond", "url": "https://twinpeaks.fandom.com/wiki/Alice_Tremond", "text": "Alice Tremond\nAlice Tremond was the owner of the house at 708 Northwestern Street in Twin Peaks, Washington.\nBiography\nAlice purchased her home from its previous owner, Mrs. Chalfont.\nOne evening, Dale Cooper and Carrie Page knocked on Alice's door. Cooper, confused to see Alice greet them, asked if Sarah Palmer was home, but Alice said that she wasn't, and that she didn't know anyone by that name. Turning to someone else in the home for help, Alice answered Cooper's questions about the property's previous owners. Eventually, Cooper apologized for disturbing them at a late hour, and left with Carrie."}
data/input_docs/Allen_K._Boyle.json ADDED
@@ -0,0 +1 @@
 
1
+ {"name": "Allen_K._Boyle", "url": "https://twinpeaks.fandom.com/wiki/Allen_K._Boyle", "text": "Allen K. Boyle\nAllen K. Boyle was a traveling salesman who picked up Dale Cooper outside Bloomsburg, Pennsylvania on July 31, 1969.\nCooper recorded Boyle stating his belief that the sun was dying and that he had a plan for when it died."}
data/input_docs/American_girl%27s_mother.json ADDED
@@ -0,0 +1 @@
 
1
+ {"name": "American_girl%27s_mother", "url": "https://twinpeaks.fandom.com/wiki/American_girl%27s_mother", "text": "American girl's mother\nThe American girl's mother was heard making a loud banging noise during Dale Cooper's escape from the red room. \nAfter vanishing from the glass box in New York City, Dale Cooper arrived at an unusual \"mansion room,\" where an unseen presence banged on two separate doors, attempting to breach the room. When Cooper left and then returned to that room, he found the American girl sitting before the fireplace, and she implored him to exit through the nearby wall socket before her mother arrived."}
data/input_docs/American_girl.json ADDED
@@ -0,0 +1 @@
 
1
+ {"name": "American_girl", "url": "https://twinpeaks.fandom.com/wiki/American_girl", "text": "American girl\n\"You'd better hurry, my mother's coming.\"\nAn American girl appeared to Dale Cooper during his escape from the red room.\nBiography\nThe girl resembled an older Ronette Pulaski, whom Cooper had met in 1989 while investigating her kidnapping and Laura Palmer's murder at the hands of BOB.\nAfter his encounter with Naido in the mansion room, Cooper returned to find the girl sitting in Naido's place before the fireplace. When the girl's digital watch struck 2:53, a mechanism in the room's wall resembling a power outlet began to hum and she begged Cooper to go before the arrival of her mother, whose presence was apparent by a banging noise. She additionally told him, \"When you get there, you will already be there.\"\nBehind the scenes\nThe \"American Girl,\" so identified in the end credits of Part 3, was portrayed by Phoebe Augustine, the actress who played Ronette Pulaski in the original Twin Peaks and in Twin Peaks: Fire Walk with Me."}
data/input_docs/An_Introduction_to_David_Lynch.json ADDED
@@ -0,0 +1 @@
 
1
+ {"name": "An_Introduction_to_David_Lynch", "url": "https://twinpeaks.fandom.com/wiki/An_Introduction_to_David_Lynch", "text": "An Introduction to David Lynch\n\"An Introduction to David Lynch\" is a featurette originally released in the 2001 DVD release, Twin Peaks: The First Season. It was later re-released in the Blu-ray sets, Twin Peaks: The Entire Mystery and Twin Peaks: From Z to A.\nIt features interviews with Catherine E. Coulson, Charles Ramirez-Borg, Lesli Linka Glatter, Miguel Ferrer, Michael J. Anderson, Kyle MacLachlan, Duwayne Dunham, Sheryl Lee, Peggy Lipton, and Richard Beymer, each discussing the filmmaking processes and methods of Twin Peaks co-creator David Lynch.\nvteTwin Peaks home video releasesThe First Season\n\"17 Pieces of Pie\"\n\"An Introduction to David Lynch\"\n\"Learning to Speak in the Red Room\"\n\"Mark Frost Telephone Interview\"\n\"Postcards From The Cast\"\nLog Lady introductions\nThe Second Season\n\"Cast Interview\"\n\"Crew Interview\"\nFire Walk with Me (Criterion)\n\"Reflections on the Phenomenon of Twin Peaks\"\nDefinitive Gold Box Edition\n\"A Slice of Lynch\"\n\"Location Guide\"\n\"Return to Twin Peaks\"\n\"Secrets from Another Place: Creating Twin Peaks\"\n\"Saturday Night Live\"\nGeorgia Coffee commercials\nTwin Peaks Sheriff's Hotline\nThe Entire Mystery\nTwin Peaks: The Missing Pieces\n\"Atmospherics\"\n\"Between Two Worlds\"\n\"Moving Through Time: Fire Walk with Me Memories\"\nA Limited Event Series\nImpressions: A Journey Behind the Scenes of Twin Peaks (The Man with the Gray Elevated Hair\nTell It Martin\nTwo Blue Balls\nThe Number of Completion\nBad Binoculars\nSee You on the Other Side Dear Friend\nDo Not Pick Up Hitchhikers\nA Bloody Finger In Your Mouth\nThe Polish Accountant\nA Pot of Boiling Oil)\nTwin Peaks: The Phenomenon\n\"Behind the Red Curtain\"\n\"I Had Bad Milk in Dehradun\"\n\"A Very Lovely Dream: One Week in Twin Peaks\"\nThe Television CollectionTBAFrom Z to A\nBehind the Curtain\nOn the Couch\nA Talk with Kyle MacLachlan and Sheryl Lee\nOthers\"Twin Peaks Festival Greeting\""}
data/input_docs/Andrew_Packard.json ADDED
@@ -0,0 +1 @@
 
1
+ {"name": "Andrew_Packard", "url": "https://twinpeaks.fandom.com/wiki/Andrew_Packard", "text": "Andrew Packard\nAndrew Packard was a sprightly and high-spirited man in spite of his old age, and was formerly the owner of the Packard Sawmill. \nBiography\nAt the age of 16 in 1927, Packard was a Scout First Class in the Boy Scouts. In May, he and fellow scouts Rusty and Theo set out to earn a hiking merit badge whilst supervised by scoutmaster Dwayne Milford. Upon reaching Glastonbury Grove, Rusty and Theo became lightheaded as dark clouds approached the area. Preparing for a storm, Milford led the boys back to their camp.\nThe storm lasted for several hours, during which time, Packard went to retrieve Nehi orange sodas Milford had brought for them and saw lightning strike a nearby Douglas fir. In the flash of lightning, he spotted an extremely tall man looking at him, then vanish into the woods. Initially thinking the man must have been a lumberjack, he returned to camp with the sodas and told the scouts of the man he saw.\nThroughout the night, Packard heard thudding outside of the tent until he managed to fall asleep. In the morning, Milford found a large footprint and told Packard of a camping trip his brother, Douglas went on six months prior, where he claimed to have seen a giant, walking owl.\nBy 1947, he was a scoutmaster and was also involved in several organizations in Twin Peaks, including the Rotary, Chamber of Commerce, Optimists Club, Elk Lodge, and Masonic Lodge.\nIn 1948, Packard took control of the Packard Sawmill.\nIn 1983, he met Josette Mai Wong at a state-sponsored black-tie \"mixer\" at the Hong Kong trade center. He brought her to his hometown of Twin Peaks, where she accepted his proposal of marriage.\nA boating enthusiast, he named his 1936 Chris-Craft Sportsman \"JOSIE.\" On September 27, 1987, the boat exploded, leaving unidentifiable human remains. Anticipating the attempt on his life by Thomas Eckhardt\u2014who Josie had ties with\u2014Packard managed to escape and go into hiding, with the town believing he had perished.\nIn March 1989, after listening on a conversation between his sister Catherine and Josie about his apparent death. Andrew entered Catherine's office and commented that everything is going exactly as planned, and a trap had been set for Eckhardt.\nDays later, Catherine revealed Andrew to her husband Pete, who was flabbergasted at his survival. Catherine and Andrew explained the latter's survival, as his death was faked due to them foreseeing Eckhardt's plan to murder him.\nPete brought Andrew breakfast a couple of mornings later in the shape of a dog's face, much to his amusement. Catherine, however, was not amused. Pete brought salt and pepper, then left. Josie then arrived and fainted upon seeing Andrew.\nHe later had a drink with Josie and revealed to her that he has long since forgiven her for his attempted murder, putting all blame on Eckhardt, who he implored her to visit. He exited, saying they would never speak again.\nPackard went to the Great Northern Hotel, where he revealed himself to Eckhardt as being alive. He also warned him to be careful around Josie, who subsequently killed Eckhardt and died soon after.\nIn the following days, Catherine told Andrew about a box left by Eckhardt's assistant, Jones, which appeared to be a sort of puzzle box with an astrological code on it. He informed her that her investors for the Ghostwood Development Project were on board and he pointed to constellations on the box, putting in a sort of combination, revealing a smaller box, which he busted open with a rolling pin. Contained inside was a metallic box.\nAndrew and Pete attempted to open the final box the next day but were unsuccessful until Packard angrily shot it with a revolver. Inside it was a safety deposit box key, which Catherine placed in the cake saver.\nHe took the key from the cake saver later at night and replaced it with another key. Pete then came to bid him goodnight.\nLater, he and Pete went to the Twin Peaks Savings and Loan, where clerk Dell Mibbler was shocked upon seeing him alive and well. Mibbler led them to a safety deposit box, past Audrey Horne, who was chained to the vault door out of civil disobedience. Upon opening the box, a bomb planted by Eckhardt was triggered, killing Andrew and Pete.\nBehind the scenes\nAndrew Packard was played by Irish actor Dan O'Herlihy, who was one of three actors from the 1987 film RoboCop to appear in Twin Peaks, the other two being Ray Wise and Miguel Ferrer.\nTrivia\nAndrew has the distinction of being the only character in the original series to say the word \"shit.\" However, it was censored by the bomb in order to adhere to television regulations."}
data/input_docs/Andrews.json ADDED
@@ -0,0 +1 @@
 
1
+ {"name": "Andrews", "url": "https://twinpeaks.fandom.com/wiki/Andrews", "text": "Andrews\nAndrews was a prison guard at the Washington State Prison."}
data/input_docs/Andy_(Bryn_Mawr).json ADDED
@@ -0,0 +1 @@
 
1
+ {"name": "Andy_(Bryn_Mawr)", "url": "https://twinpeaks.fandom.com/wiki/Andy_(Bryn_Mawr)", "text": "Andy (Bryn Mawr)\nThis is an article about the character from The Autobiography of Dale Cooper: My Life, My Tapes. For the character of the same name from the TV series, see Andy Brennan.\nAndy was a college love interest of Dale Cooper.\nBiography\nCooper first met Andy while meeting students from Bryn Mawr College. However, he drank too much to remember even for sure what her hair color was but was determined to meet her again.\nThe two encountered each other a month later after Cooper was hit in the head by a field hockey ball hit by Andy. When he came to, they talked a little bit and decided to meet at the homecoming bonfire that was being held the next night.\nDale and Andy met at the bonfire and had a sexual encounter in a spot among large trees and shaded from the moon and they rolled into a small body of water. After getting back into their clothes, Andy informed Cooper that she would be leaving for an exchange trip to Holland in the morning and said to not follow her, as her husband would be meeting her at the airport, this being her first revelation to him that she was married.\nAndy returned early the next year without her husband and gave Cooper a book on the Kama Sutra. She later joined Cooper in a motel room for a \"study on the limitations of the mind and body.\"\nWeeks later, Andy received word that her husband had become injured in Holland and decided to return and she broke off her affair with Cooper."}