T-Flet commited on
Commit
e8c8660
1 Parent(s): 30f68b0

Cleaned the Wookieepedia scrapes further; updated the app (currently not using the agent version but the Wookieepedia retriever one)

Browse files
Files changed (3) hide show
  1. explore.ipynb +103 -85
  2. serve.py +20 -10
  3. wookieepedia.ipynb +0 -0
explore.ipynb CHANGED
@@ -103,20 +103,21 @@
103
  "\n",
104
  "# Separately, no need to pay for OpenAIEmbeddings; additionally, all-MiniLM-L6-v2 is default in docs\n",
105
  "\n",
106
- "REGENERATE_DATABASE = False\n",
107
  "\n",
108
  "if (db_exists := os.path.exists(db_dir := r'scripts\\db')):\n",
109
- " if REGENERATE_DATABASE:\n",
110
  " print('Deleting the previous database and creating a new one (because otherwise content is duplicated in the db every time this block is run)')\n",
111
  " shutil.rmtree(db_dir)\n",
112
  " else: script_db = Chroma(embedding_function = SentenceTransformerEmbeddings(model_name = 'all-MiniLM-L6-v2'), persist_directory = db_dir)\n",
113
  "\n",
114
- "if not db_exists or (db_exists and REGENERATE_DATABASE): # Unfortunate disjoining of the two conditional blocks\n",
115
- " scripts = DirectoryLoader('scripts', glob = '**/[!.]*.txt', loader_cls = TextLoader).load()\n",
116
  " for s in scripts: s.page_content = re.sub(r'\\t+|[ ]{2,}', '', s.page_content) # Spacing to centre text noise\n",
117
  "\n",
118
- " script_chunks = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200).split_documents(scripts)\n",
119
  " # Why not some overlap for extra context just in case?\n",
 
120
  "\n",
121
  " script_db = Chroma.from_documents(script_chunks, SentenceTransformerEmbeddings(model_name = 'all-MiniLM-L6-v2'), persist_directory = db_dir)\n",
122
  "\n",
@@ -166,16 +167,17 @@
166
  },
167
  {
168
  "cell_type": "code",
169
- "execution_count": 6,
170
  "metadata": {},
171
  "outputs": [
172
  {
173
  "name": "stdout",
174
  "output_type": "stream",
175
  "text": [
176
- "The Wookieepedia database contains 10615 chunks, with mean length of 736 characters\n",
 
177
  "Current source pages in Wookieepedia db:\n",
178
- "395\n"
179
  ]
180
  },
181
  {
@@ -196,7 +198,6 @@
196
  " 'wookieepedia\\\\Amee.txt',\n",
197
  " 'wookieepedia\\\\Amidalans.txt',\n",
198
  " 'wookieepedia\\\\Anakin_Skywalker.txt',\n",
199
- " 'wookieepedia\\\\Ankanksha_Sahu.txt',\n",
200
  " 'wookieepedia\\\\Area_D-512.txt',\n",
201
  " 'wookieepedia\\\\Ask_Aak.txt',\n",
202
  " 'wookieepedia\\\\Assembly.txt',\n",
@@ -574,11 +575,10 @@
574
  " 'wookieepedia\\\\Yoda.txt',\n",
575
  " 'wookieepedia\\\\Younglings_(comic).txt',\n",
576
  " 'wookieepedia\\\\Zam.txt',\n",
577
- " 'wookieepedia\\\\Zev.txt',\n",
578
- " 'wookieepedia\\\\log\\\\wookiepedia_successful_scrapes.txt'}"
579
  ]
580
  },
581
- "execution_count": 6,
582
  "metadata": {},
583
  "output_type": "execute_result"
584
  }
@@ -593,11 +593,11 @@
593
  " else: woo_db = Chroma(embedding_function = SentenceTransformerEmbeddings(model_name = 'all-MiniLM-L6-v2'), persist_directory = db_dir)\n",
594
  "\n",
595
  "if not db_exists or (db_exists and REGENERATE_WOOKIEEPEDIA_DATABASE): # Unfortunate disjoining of the two conditional blocks\n",
596
- " pages = DirectoryLoader('wookieepedia', glob = '**/[!.]*.txt', loader_cls = TextLoader).load()\n",
597
- " for s in pages: s.page_content = re.sub(r'\\t+|[ ]{2,}', '', s.page_content) # Spacing to centre text noise\n",
598
  "\n",
599
- " page_chunks = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200).split_documents(pages)\n",
600
  " # Why not some overlap for extra context just in case?\n",
 
601
  "\n",
602
  " woo_db = Chroma.from_documents(page_chunks, SentenceTransformerEmbeddings(model_name = 'all-MiniLM-L6-v2'), persist_directory = db_dir)\n",
603
  "\n",
@@ -610,25 +610,25 @@
610
  },
611
  {
612
  "cell_type": "code",
613
- "execution_count": 7,
614
  "metadata": {},
615
  "outputs": [
616
  {
617
  "data": {
618
  "text/plain": [
619
- "[Document(page_content='Vader telling Luke that he is his father\\nVader beckoned Luke to join him in the dark side, but Luke vehemently refused. Vader then told Luke that Kenobi had hidden from him the truth about his father, Anakin Skywalker, before revealing that he was, in fact, his father. Luke refused to believe the truth, but Vader continued to tempt his weakened son, offering Luke the chance to destroy the Emperor and \"bring order to the galaxy,\"[13] just as he tried to do with Padmé on Mustafar.[11] He even pleaded with his son to come with him.[13] In Vader\\'s ideal world, his son would have taken his hand, accepting him as his father.[384] However, Skywalker instead chose to throw himself into the shaft, possibly facing death to avoid joining Vader.[13]', metadata={'source': 'wookieepedia\\\\Anakin_Skywalker.txt'}),\n",
620
- " Document(page_content=\"into murdering his father so they could rule the galaxy together.[4] While trying to steer Bridger, he displayed himself in his public personality via hologram, showing himself as benign and polite. Once Bridger frustrated his plans, his true malignant self was revealed as he had underestimated the boy's hatred for the Empire and devotion to the Rebellion and new-found rebel family.[193] Likewise, when Sidious tried to motivate Luke into killing his father upon momentarily embracing the dark side of the Force, the young Skywalker desisted due to his compassion for his father; his Jedi teachings.[4]\", metadata={'source': 'wookieepedia\\\\Darth_Sidious.txt'}),\n",
621
- " Document(page_content=\"―Darth Vader and Luke Skywalker\\xa0—Listen (file info)[9]\\nThe revelation that Vader was Skywalker's father shook the Jedi hopeful to his core.\\nSeeing that Skywalker was vulnerable in his weakened state, Vader offered to show Skywalker the power of the dark side and complete his training, but Skywalker adamantly refused to ever join Vader, the man who had killed his father. In that moment, Vader revealed the truth: that he was his father.[9]\", metadata={'source': 'wookieepedia\\\\Luke_Skywalker.txt'}),\n",
622
- " Document(page_content='One last duel\\n\"Your thoughts betray you, Father. I feel the good in you, the conflict.\"\"There is no conflict.\"\"You couldn\\'t bring yourself to kill me before, and I don\\'t believe you\\'ll destroy me now.\"\"You underestimate the power of the dark side. If you will not fight, then you will meet your destiny!\"\\n―Luke Skywalker and Darth Vader[4]\\nFather and son fight as the Emperor watches.', metadata={'source': 'wookieepedia\\\\Anakin_Skywalker.txt'}),\n",
623
- " Document(page_content='Luke surrendered himself to attempt to turn his Father to the light.\\nRestless after sensing his son,[39] Vader returned to his master\\'s side and reported the shuttle\\'s arrival and his son\\'s presence with it. The Emperor allowed him to land on Endor at the shield generator base, where he was to await Luke, who according to the Emperor, would come to him. He complied with his orders and landed on Endor with the Lambda-class T-4a shuttle ST 321; as predicted by Sidious, during the night Luke surrendered himself in an attempt to talk to him and bring him back to the light side. Although Vader complimented his son\\'s skills with the Force and his skills in building a lightsaber, he refused his son\\'s pleas. Rebuffed, Luke said his father\\'s identity as Anakin Skywalker was \"truly dead\" as Vader sent Luke to the Emperor.[4]\\nOne last duel', metadata={'source': 'wookieepedia\\\\Anakin_Skywalker.txt'}),\n",
624
- " Document(page_content=\"The secret of Skywalker's parentage was made known to him by Vader during their duel in Cloud City.\\nIgnoring Kenobi and Yoda's protests but promising to one day return, Skywalker traveled to Cloud City on the world of Bespin, where he dueled and was bested by Vader. After losing his hand and lightsaber during the duel, Skywalker was horrified when the Sith Lord then revealed he was his father, choosing to fall down an air shaft rather than agree to Vader's offer to rule the galaxy with him. After being rescued by his friends—who had lost Solo after the captain had been frozen in carbonite and taken away by Boba Fettaboard the Millennium Falcon,[2] Skywalker was left in stunned silence over the truth of his heritage, not even noticing that Organa introduced his name to Baron Administrator Landonis Balthazar Calrissian.[179]\", metadata={'source': 'wookieepedia\\\\Jedi.txt'}),\n",
625
- " Document(page_content='Parenthood\\n\"Did you bring me anything of value, bounty hunter?\"\"Not much. Just his name. Skywalker.\"\\n―Darth Vader and Boba Fett[218]\\nVader, realizing he has a son, cracks the viewport in anger.\\nTroubled, Vader returned to his ship. Aboard the Nubian, Aphra told him that Fett wanted to report in during Vader\\'s absence. Subsequently, Vader traveled to a Star Destroyer to meet with the bounty hunter and hear his story. The Dark Lord was disappointed to hear that Fett had lost the boy, but before leaving, Fett told him his name: Luke Skywalker. In his solitude, Vader remembered Amidala telling him about her pregnancy, and Palpatine\\'s half-truth, and cracked the viewport in his anger. Thereupon, he used his meditation chamber\\'s holoprojector to contact his master, but chose to say nothing about his son; rather, Vader assured him that he would not fail and ended the transmission. Nevertheless, he accepted Luke as his son and vowed that the boy would be his.[218]', metadata={'source': 'wookieepedia\\\\Anakin_Skywalker.txt'}),\n",
626
- " Document(page_content=\"Being the Emperor's Fist and acting as de facto Commander-in-Chief of the Imperial Military despite not holding a military rank, Vader enforced the rule of the New Order as the Emperor's Sith apprentice throughout most of the Imperial Era. In the aftermath of killing Kenobi in their third rematch and the Battle of Yavin in 0 BBY, he discovered the existence of his son and was determined to turn Luke to the dark side. Luke sought to become a Jedi, like his father before him, and believed that Vader had the potential to turn back to the light side of the Force. Vader was defeated by Luke during the Battle of Endor in 4 ABY, but the young Jedi refused to strike down his father in anger, causing the Emperor to torture Luke with Force lightning. The pain inflicted on his son awakened the part of Vader that was still Anakin, resulting in a redeemed Skywalker killing Sidious at the cost of his own life. Having destroyed the Sith and fulfilled his destiny as the Chosen One, Skywalker made\", metadata={'source': 'wookieepedia\\\\Anakin_Skywalker.txt'}),\n",
627
- " Document(page_content='Vader\\'s departure\\n\"Luke, you can destroy the Emperor. He has foreseen this. It is your destiny. Join me, and together, we can rule the galaxy as father and son!\"\\n―Darth Vader, to Luke Skywalker[16]\\nDarth Vader eventually lured Luke Skywalker to Cloud City on Bespin, where the Sith Lord attempted to capture Skywalker in carbonite and transport the Jedi-aspirant to the Emperor. However, Skywalker evaded capture and was confronted by Vader in a duel on Cloud City. There, with the Jedi-aspirant beaten by a precarious ledge, Vader divulged knowledge of his relationship with Skywalker, his son, and gave an ultimatum for the youth: to join forces—as said to be destiny—with his father, Darth Vader, or die. However, Luke Skywalker rejected his newfound father, and allowed himself to fall into the abyss.[16]', metadata={'source': 'wookieepedia\\\\Darth_Sidious.txt'}),\n",
628
- " Document(page_content='Vader re-entered the fray and forced Skywalker back onto the catwalk. Sensing that Skywalker was weakened, the Sith Lord demanded that Skywalker surrender, lest he be destroyed as Obi-Wan Kenobi had been. Skywalker refused and desperately tried to keep fighting Vader, to which Vader responded with several ferocious strikes, ultimately forcing Skywalker to the very edge of the catwalk and slicing off his right hand. Both Skywalker\\'s hand and his lightsaber fell into the depths of the reactor shaft.[9]\\nA dark revelation\\n\"Obi-Wan never told you what happened to your father.\"\"He told me enough! He told me you killed him!\"\"No. I am your father.\"\"No...that\\'s not true! That\\'s impossible!\"\"Search your feelings. You know it to be true.\"\"No!\"\\n―Darth Vader and Luke Skywalker\\xa0—Listen (file info)[9]\\nThe revelation that Vader was Skywalker\\'s father shook the Jedi hopeful to his core.', metadata={'source': 'wookieepedia\\\\Luke_Skywalker.txt'})]"
629
  ]
630
  },
631
- "execution_count": 7,
632
  "metadata": {},
633
  "output_type": "execute_result"
634
  }
@@ -644,9 +644,29 @@
644
  },
645
  {
646
  "cell_type": "code",
647
- "execution_count": 130,
648
  "metadata": {},
649
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
650
  "source": [
651
  "# Functions for possible interactive Wookieepedia querying and storing in the db\n",
652
  "\n",
@@ -663,28 +683,37 @@
663
  "# first_wookieepedia_result('Darth Plagueis')\n",
664
  "\n",
665
  "\n",
666
- "def get_new_wookieepedia_chunks(query: str, previous_sources: set[str]) -> list[Document]:\n",
667
- " '''Retrieve and return chunks of the content of the first result of query on Wookieepedia, then return the closest matches for.\n",
668
  " '''\n",
669
  " url = first_wookieepedia_result(query)\n",
670
  "\n",
671
- " if url in previous_sources: return []\n",
672
  " else:\n",
673
- " doc = WebBaseLoader(url).load()[0] # Only one url passed in => only one Document out; no need to assert\n",
674
- " \n",
675
- " # There probably is a very long preamble before the real content, however, if more than one gap then ignore and proceed with full document\n",
676
- " trimmed = parts[1] if len(parts := doc.page_content.split('\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\xa0 \\xa0')) == 2 else doc.page_content\n",
677
- " doc.page_content = re.sub(r'[\\n\\t]{2,}', '\\n', trimmed) # And remove excessive spacing\n",
678
  "\n",
679
- " return RecursiveCharacterTextSplitter(chunk_size = 800, chunk_overlap = 100).split_documents([doc])\n",
 
 
 
 
 
680
  "\n",
681
- "# get_wookieepedia_chunks('Darth Plagueis', set())\n",
 
 
 
682
  "\n",
683
  "\n",
684
  "def get_wookieepedia_context(original_query: str, simple_query: str, wdb: Chroma) -> list[Document]:\n",
685
  " try:\n",
686
- " new_chunks = get_new_wookieepedia_chunks(simple_query, previous_sources = set(md.get('source') for md in wdb.get()['metadatas']))\n",
687
- " if new_chunks: wdb.add_documents(new_chunks)\n",
 
 
 
688
  " except: return []\n",
689
  "\n",
690
  " return wdb.similarity_search(original_query, k = 10)\n",
@@ -702,7 +731,7 @@
702
  },
703
  {
704
  "cell_type": "code",
705
- "execution_count": 8,
706
  "metadata": {},
707
  "outputs": [],
708
  "source": [
@@ -720,7 +749,7 @@
720
  },
721
  {
722
  "cell_type": "code",
723
- "execution_count": 9,
724
  "metadata": {},
725
  "outputs": [],
726
  "source": [
@@ -753,7 +782,7 @@
753
  },
754
  {
755
  "cell_type": "code",
756
- "execution_count": 10,
757
  "metadata": {},
758
  "outputs": [],
759
  "source": [
@@ -780,7 +809,7 @@
780
  },
781
  {
782
  "cell_type": "code",
783
- "execution_count": 11,
784
  "metadata": {},
785
  "outputs": [],
786
  "source": [
@@ -806,21 +835,21 @@
806
  },
807
  {
808
  "cell_type": "code",
809
- "execution_count": 13,
810
  "metadata": {},
811
  "outputs": [
812
  {
813
  "data": {
814
  "text/plain": [
815
  "{'input': \"Who participates in Han's rescue from Jabba? And where is the palace?\",\n",
816
- " 'context': [Document(page_content=\"After the smuggler Han Solo failed to repay him for lost cargo, Jabba placed a high price on his head. Solo was eventually delivered to him by one of his bounty hunters, Boba Fett, as a gift from Darth Vader. However, this capture brought him to the attention of Jedi Knight Luke Skywalker, who sought to rescue his friend from Jabba's imprisonment. As he attempted to execute the Jedi and his allies in the Great Pit of Carkoon, Jabba was choked to death by Leia Organa. With the Hutts unable to decide who would inherit Jabba's criminal ventures, many of his slaves, including the Niktos, were free, and his palace was occupied by his former Majordomo Bib Fortuna, who took his place as Daimyo of Tatooine until Fett killed and usurped him. Fett sought to rebuild Jabba's criminal empire in his own image, intending to rule with respect rather than the fear that the Hutt instilled in his followers.\\nContents\\n1 Biography\\n1.1 Tatooine Crime Lord\\n1.2 The High Republic\\n1.2.1 The Hynestian Treaty\", metadata={'source': 'wookieepedia\\\\Jabba_Desilijic_Tiure.txt'}),\n",
817
- " Document(page_content=\"On a distant spaceport, Mira manages to escape from the stormtroopers who were guarding her and sneak into the Millennium Falcon, which was being loading with spice. Han Solo and Chewbacca are going to make another Kessel Run for Jabba the Hutt, but first he was going to pick up Luke Skywalker. On the hidden Rebel base on the planet Randa, Han picks up Luke, Artoo and Threepio and finds his unexpected guest. Mira tells Luke and Han about Bzorn's plan and asks Han to deliver her to the professor, knowing that he will stop the destruction after he learns that Mira is safe.\", metadata={'source': 'wookieepedia\\\\The_Second_Kessel_Run.txt'}),\n",
818
- " Document(page_content=\"As Bo-Katan Kryze returned to her forces without the Darksaber, her Mandalorians abandon her and become privateers under Axe Woves.[926]\\nThe Jedi Luke Skywalker beckons Grogu to train as a Jedi with him. Removing his Mandalorian helmet, Djarin gives his blessing to Grogu before letting the latter go.[925]\\nLuke Skywalker builds a Jedi Temple on Ossus.[927] After a brief time with Skywalker there, Grogu decides to return to Din Djarin.[928]Jabba's Palace under new management\\nBoba Fett and Fennec Shand attack Jabba's Palace, replacing Bib Fortuna's place as master of the palace.[925]\\nA gang war erupts on Tatooine, with Daimyo Boba Fett's Fett gotra defeating the Pyke Syndicate presence on the world and emerging as its leading criminal syndicate.[928]\\nc. 9 ABY\", metadata={'source': 'wookieepedia\\\\Timeline_of_galactic_history.txt'}),\n",
819
- " Document(page_content=\"Aboard the shuttle Tydirium, Leia Organa, Nien Nunb, and Kidi Aleri reach Kothlis, where they join Luke Skywalker. Skywalker reports that they had gathered that Han Solo is still in carbonite, held at Jabba's Palace.[680]\\nIden Versio escapes from the Invincible Faith with Moff Derrek Raythe's message, in which Death Squadron is ordered to move to the far side of Endor as part of the Empire's plans to trap the Rebel Alliance during what the rebels think to be their surprise assault on the Emperor's new battle station.[876]\", metadata={'source': 'wookieepedia\\\\Timeline_of_galactic_history.txt'})],\n",
820
- " 'answer': \"Han's rescue from Jabba the Hutt's palace involves a team consisting of Luke Skywalker, Leia Organa, Chewbacca, Lando Calrissian, R2-D2, and C-3PO. The palace is located on the desert planet of Tatooine. The rescue mission is a pivotal moment in the Star Wars saga, showcasing the bravery and unity of the Rebel Alliance against the oppressive forces of the Galactic Empire.\"}"
821
  ]
822
  },
823
- "execution_count": 13,
824
  "metadata": {},
825
  "output_type": "execute_result"
826
  }
@@ -835,6 +864,23 @@
835
  "))"
836
  ]
837
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
838
  {
839
  "cell_type": "markdown",
840
  "metadata": {},
@@ -844,7 +890,7 @@
844
  },
845
  {
846
  "cell_type": "code",
847
- "execution_count": 15,
848
  "metadata": {},
849
  "outputs": [],
850
  "source": [
@@ -870,35 +916,7 @@
870
  },
871
  {
872
  "cell_type": "code",
873
- "execution_count": 17,
874
- "metadata": {},
875
- "outputs": [
876
- {
877
- "data": {
878
- "text/plain": [
879
- "'Darth Plagueis Wise'"
880
- ]
881
- },
882
- "execution_count": 17,
883
- "metadata": {},
884
- "output_type": "execute_result"
885
- }
886
- ],
887
- "source": [
888
- "simplify_query_prompt = ChatPromptTemplate.from_messages([\n",
889
- " ('system', 'Given the above conversation, generate a search query to find a relevant page in the Star Wars fandom wiki; the query should be something simple, at most 4 words, such as the name of a character, place, event, item, etc.'),\n",
890
- " MessagesPlaceholder('chat_history', optional = True), # Using this form since not clear how to have optional = True in the tuple form\n",
891
- " ('human', '{query}')\n",
892
- "])\n",
893
- "\n",
894
- "simplify_query_chain = simplify_query_prompt | llm | StrOutputParser() # To extract just the message\n",
895
- "\n",
896
- "# simplify_query_chain.invoke(dict(context = 'You are an expert of Star Wars lore', query = 'Do you know the tragedy of Darth Plagueis the Wise?'))"
897
- ]
898
- },
899
- {
900
- "cell_type": "code",
901
- "execution_count": 19,
902
  "metadata": {},
903
  "outputs": [
904
  {
@@ -1016,7 +1034,7 @@
1016
  "and supplies in all directions. Suddenly they stop. Then \n",
1017
  "everything is quiet for a few moments. A great howling moan \n",
1018
  "is heard echoing throughout the canyon which sends the \n",
1019
- "Sandpeople fleeing in terror.\u001b[0m\u001b[32;1m\u001b[1;3mThe dialogue from the Star Wars film scripts does not directly mention the tragedy of Darth Plagueis the Wise. However, in \"Star Wars: Episode III - Revenge of the Sith,\" Chancellor Palpatine tells Anakin Skywalker the story of Darth Plagueis the Wise. According to Palpatine, Darth Plagueis was a Dark Lord of the Sith who was so powerful and wise that he could influence the midi-chlorians to create life and prevent death. The tragedy of Darth Plagueis the Wise is that he was eventually betrayed and killed by his own apprentice, who was seeking to gain his power. This story plays a significant role in Anakin's fall to the dark side as he becomes intrigued by the idea of cheating death, which ultimately leads him to become Darth Vader.\u001b[0m\n",
1020
  "\n",
1021
  "\u001b[1m> Finished chain.\u001b[0m\n"
1022
  ]
@@ -1027,10 +1045,10 @@
1027
  "{'chat_history': [HumanMessage(content='Are you knowledgeable about Star Wars?'),\n",
1028
  " AIMessage(content='Very')],\n",
1029
  " 'input': 'Do you know the tragedy of Darth Plagueis the Wise?',\n",
1030
- " 'output': 'The dialogue from the Star Wars film scripts does not directly mention the tragedy of Darth Plagueis the Wise. However, in \"Star Wars: Episode III - Revenge of the Sith,\" Chancellor Palpatine tells Anakin Skywalker the story of Darth Plagueis the Wise. According to Palpatine, Darth Plagueis was a Dark Lord of the Sith who was so powerful and wise that he could influence the midi-chlorians to create life and prevent death. The tragedy of Darth Plagueis the Wise is that he was eventually betrayed and killed by his own apprentice, who was seeking to gain his power. This story plays a significant role in Anakin\\'s fall to the dark side as he becomes intrigued by the idea of cheating death, which ultimately leads him to become Darth Vader.'}"
1031
  ]
1032
  },
1033
- "execution_count": 19,
1034
  "metadata": {},
1035
  "output_type": "execute_result"
1036
  }
 
103
  "\n",
104
  "# Separately, no need to pay for OpenAIEmbeddings; additionally, all-MiniLM-L6-v2 is default in docs\n",
105
  "\n",
106
+ "REGENERATE_SCRIPT_DATABASE = False\n",
107
  "\n",
108
  "if (db_exists := os.path.exists(db_dir := r'scripts\\db')):\n",
109
+ " if REGENERATE_SCRIPT_DATABASE:\n",
110
  " print('Deleting the previous database and creating a new one (because otherwise content is duplicated in the db every time this block is run)')\n",
111
  " shutil.rmtree(db_dir)\n",
112
  " else: script_db = Chroma(embedding_function = SentenceTransformerEmbeddings(model_name = 'all-MiniLM-L6-v2'), persist_directory = db_dir)\n",
113
  "\n",
114
+ "if not db_exists or (db_exists and REGENERATE_SCRIPT_DATABASE): # Unfortunate disjoining of the two conditional blocks\n",
115
+ " scripts = DirectoryLoader('scripts', glob = '*.txt', loader_cls = TextLoader).load()\n",
116
  " for s in scripts: s.page_content = re.sub(r'\\t+|[ ]{2,}', '', s.page_content) # Spacing to centre text noise\n",
117
  "\n",
118
+ " script_chunks = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200, separators = ['\\n\\n\\n', '\\n\\n', '\\n']).split_documents(scripts)\n",
119
  " # Why not some overlap for extra context just in case?\n",
120
+ " # Also, no need for fancier sentence or semantic splitting in this highly-formatted text\n",
121
  "\n",
122
  " script_db = Chroma.from_documents(script_chunks, SentenceTransformerEmbeddings(model_name = 'all-MiniLM-L6-v2'), persist_directory = db_dir)\n",
123
  "\n",
 
167
  },
168
  {
169
  "cell_type": "code",
170
+ "execution_count": 4,
171
  "metadata": {},
172
  "outputs": [
173
  {
174
  "name": "stdout",
175
  "output_type": "stream",
176
  "text": [
177
+ "Deleting the previous database and creating a new one (because otherwise content is duplicated in the db every time this block is run)\n",
178
+ "The Wookieepedia database contains 10838 chunks, with mean length of 645 characters\n",
179
  "Current source pages in Wookieepedia db:\n",
180
+ "393\n"
181
  ]
182
  },
183
  {
 
198
  " 'wookieepedia\\\\Amee.txt',\n",
199
  " 'wookieepedia\\\\Amidalans.txt',\n",
200
  " 'wookieepedia\\\\Anakin_Skywalker.txt',\n",
 
201
  " 'wookieepedia\\\\Area_D-512.txt',\n",
202
  " 'wookieepedia\\\\Ask_Aak.txt',\n",
203
  " 'wookieepedia\\\\Assembly.txt',\n",
 
575
  " 'wookieepedia\\\\Yoda.txt',\n",
576
  " 'wookieepedia\\\\Younglings_(comic).txt',\n",
577
  " 'wookieepedia\\\\Zam.txt',\n",
578
+ " 'wookieepedia\\\\Zev.txt'}"
 
579
  ]
580
  },
581
+ "execution_count": 4,
582
  "metadata": {},
583
  "output_type": "execute_result"
584
  }
 
593
  " else: woo_db = Chroma(embedding_function = SentenceTransformerEmbeddings(model_name = 'all-MiniLM-L6-v2'), persist_directory = db_dir)\n",
594
  "\n",
595
  "if not db_exists or (db_exists and REGENERATE_WOOKIEEPEDIA_DATABASE): # Unfortunate disjoining of the two conditional blocks\n",
596
+ " pages = DirectoryLoader('wookieepedia', glob = '*.txt', loader_cls = TextLoader).load()\n",
 
597
  "\n",
598
+ " page_chunks = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200, separators = ['\\n\\n\\n', '\\n\\n', '\\n']).split_documents(pages)\n",
599
  " # Why not some overlap for extra context just in case?\n",
600
+ " # Also, no need for fancier sentence or semantic splitting in this highly-formatted text\n",
601
  "\n",
602
  " woo_db = Chroma.from_documents(page_chunks, SentenceTransformerEmbeddings(model_name = 'all-MiniLM-L6-v2'), persist_directory = db_dir)\n",
603
  "\n",
 
610
  },
611
  {
612
  "cell_type": "code",
613
+ "execution_count": 5,
614
  "metadata": {},
615
  "outputs": [
616
  {
617
  "data": {
618
  "text/plain": [
619
+ "[Document(page_content='Vader telling Luke that he is his father \\nVader beckoned Luke to join him in the dark side, but Luke vehemently refused. Vader then told Luke that Kenobi had hidden from him the truth about his father, Anakin Skywalker, before revealing that he was, in fact, his father. Luke refused to believe the truth, but Vader continued to tempt his weakened son, offering Luke the chance to destroy the Emperor and \"bring order to the galaxy,\" just as he tried to do with Padmé on Mustafar. He even pleaded with his son to come with him. In Vader\\'s ideal world, his son would have taken his hand, accepting him as his father. However, Skywalker instead chose to throw himself into the shaft, possibly facing death to avoid joining Vader.', metadata={'source': 'wookieepedia\\\\Anakin_Skywalker.txt'}),\n",
620
+ " Document(page_content='\"Obi-Wan never told you what happened to your father.\"\"He told me enough! He told me you killed him!\"\"No. I am your father.\"\"No...that\\'s not true! That\\'s impossible!\"\"Search your feelings. You know it to be true.\"\"No!\"\\n―Darth Vader and Luke Skywalker\\xa0— Listen (file info)\\n The revelation that Vader was Skywalker\\'s father shook the Jedi hopeful to his core. \\nSeeing that Skywalker was vulnerable in his weakened state, Vader offered to show Skywalker the power of the dark side and complete his training, but Skywalker adamantly refused to ever join Vader, the man who had killed his father. In that moment, Vader revealed the truth: that he was his father.', metadata={'source': 'wookieepedia\\\\Luke_Skywalker.txt'}),\n",
621
+ " Document(page_content='\"Your thoughts betray you, Father. I feel the good in you, the conflict.\"\"There is no conflict.\"\"You couldn\\'t bring yourself to kill me before, and I don\\'t believe you\\'ll destroy me now.\"\"You underestimate the power of the dark side. If you will not fight, then you will meet your destiny!\"\\n―Luke Skywalker and Darth Vader\\n Father and son fight as the Emperor watches.', metadata={'source': 'wookieepedia\\\\Anakin_Skywalker.txt'}),\n",
622
+ " Document(page_content=\"Sidious manipulated Skywalker into a battle with his father, seeking to replace Vader with a younger apprentice. \\nLuke was then manipulated by the Emperor, who told him of his friends' impending doom and Luke was told that he would soon become his apprentice. Luke at first resisted, but as the Emperor continued to taunt him, Luke grew angrier and then grabbed his lightsaber to strike the old man down. Darth Vader intercepted Luke's strike and the two began to engage in a duel much to the Emperor's delight.\\nLuke was first able to calm himself, refusing several times to continue fighting his father. Luke attempted to hide from Vader as their fight became more intense but Vader was able to sense his son's thoughts and learned that Luke had a sister. With this knowledge, Vader told Luke that if he did not turn to the dark side, perhaps Leia would. The threat of losing his sister enraged Luke and drove him to angrily attack Vader.\", metadata={'source': 'wookieepedia\\\\Darth_Sidious.txt'}),\n",
623
+ " Document(page_content='Luke battles the Knights of Ren', metadata={'source': 'wookieepedia\\\\Luke_Skywalker.txt'}),\n",
624
+ " Document(page_content='\"Luke, you can destroy the Emperor. He has foreseen this. It is your destiny. Join me, and together, we can rule the galaxy as father and son!\"\\n―Darth Vader, to Luke Skywalker\\nDarth Vader eventually lured Luke Skywalker to Cloud City on Bespin, where the Sith Lord attempted to capture Skywalker in carbonite and transport the Jedi-aspirant to the Emperor. However, Skywalker evaded capture and was confronted by Vader in a duel on Cloud City. There, with the Jedi-aspirant beaten by a precarious ledge, Vader divulged knowledge of his relationship with Skywalker, his son, and gave an ultimatum for the youth: to join forces—as said to be destinywith his father, Darth Vader, or die. However, Luke Skywalker rejected his newfound father, and allowed himself to fall into the abyss.', metadata={'source': 'wookieepedia\\\\Darth_Sidious.txt'}),\n",
625
+ " Document(page_content='Luke Skywalker after the Battle of Yavin', metadata={'source': 'wookieepedia\\\\Luke_Skywalker.txt'}),\n",
626
+ " Document(page_content=\"\\nThe son of Jedi Knight Anakin Skywalker and Senator Padmé Amidala, Luke Skywalker was born along with his twin sister, Leia, in 19 BBY. As a result of Amidala's death and Anakin's fall to the dark side of the Force, the Skywalker children were separated and sent into hiding, with Leia adopted by the royal family of Alderaan while Luke was raised by his relatives on Tatooine. Longing for a life of adventure and purpose, Skywalker joined the Rebellion and began learning the ways of the Force under the guidance of Jedi Master Obi-Wan Kenobi, whose first apprentice was Luke's own father. During the Battle of Yavin in 0 BBY, Skywalker saved the Alliance from annihilation by destroying the Empire's planet-killing superweapon, the Death Star. He continued his training in the years that followed, determined to become a Jedi Knight like his father before him, and found a new mentor in Grand Master Yoda. After his master's death, Skywalker participated in the Battle of Endor in 4 ABY, during which he confronted the Sith Lord Darth Vader, whom he learned was in fact his father, Anakin Skywalker. With Luke's help, Anakin returned to the light side of the Force by killing the Emperor and Dark Lord of the Sith Darth Sidious at the cost of his own life, fulfilling his destiny as the Chosen One. Following the Battle of Endor, Skywalker trained his sister as a first Jedi apprentice on Ajan Kloss, but Leia ended her training after the birth of her son and seeing his death as a result of finishing her knighthood.\", metadata={'source': 'wookieepedia\\\\Luke_Skywalker.txt'}),\n",
627
+ " Document(page_content='Meeting Luke Skywalker', metadata={'source': 'wookieepedia\\\\Darth_Sidious.txt'}),\n",
628
+ " Document(page_content=\"After Luke cut off Vader's mechanical hand, Sidious betrayed Vader by instructing Luke to take Vader's place by his side, much like how he betrayed Dooku and had him replaced with Vader. However, Luke began to see what had happened and that he was dangerously close to becoming like his father was now; a slave to the dark side of the Force. Luke threw away his lightsaber and turned to face the Emperor. He told the Emperor that he would never turn to the dark side and that he was a Jedi, like his father before him.\", metadata={'source': 'wookieepedia\\\\Darth_Sidious.txt'})]"
629
  ]
630
  },
631
+ "execution_count": 5,
632
  "metadata": {},
633
  "output_type": "execute_result"
634
  }
 
644
  },
645
  {
646
  "cell_type": "code",
647
+ "execution_count": 20,
648
  "metadata": {},
649
+ "outputs": [
650
+ {
651
+ "data": {
652
+ "text/plain": [
653
+ "[Document(page_content='The Tragedy of Darth Plagueis the Wise', metadata={'source': 'https://starwars.fandom.com/wiki/Darth_Plagueis'}),\n",
654
+ " Document(page_content='\"Darth Plagueis was a Dark Lord of the Sith so powerful and so wise, he could use the Force to influence the midi-chlorians to create life. He had such a knowledge of the dark side, he could even keep the ones he cared about from dying.\"\\n―Sheev Palpatine', metadata={'source': 'https://starwars.fandom.com/wiki/Darth_Plagueis'}),\n",
655
+ " Document(page_content='The death of Darth Plagueis\\n\\n\"Unfortunately, he taught his apprentice everything he knew. Then his apprentice killed him in his sleep.\"\\n―Sheev Palpatine\\nAccording to Sidious, Plagueis was powerful enough that he could use the Force to influence the midi-chlorians to create life and keep the ones he cared about from dying, a precious knowledge that awarded him the epithet of \"The Wise.\" However, Plagueis also developed a belief that the Force could \"strike back\" at him for his power. In truth, he became so powerful that the only thing he still dreaded was losing his power.', metadata={'source': 'https://starwars.fandom.com/wiki/Darth_Plagueis'}),\n",
656
+ " Document(page_content='Behind the scenes\\n Darth Plagueis as depicted in Star Wars Legends \\nDarth Plagueis was first mentioned on-screen in the 2005 film Star Wars: Episode III Revenge of the Sith, the third and final installment of the Star Wars prequel trilogy. Although Palpatine was never confirmed to be Plagueis\\' apprentice in the movie itself, a link to the official encyclopedia on StarWars.com did refer to Sidious as having been \"trained by Darth Plagueis.\"\\nThe character was created by George Lucas as early as the first draft of Revenge of the Sith—dated April 2003—and possibly earlier. His story was massively expanded upon in the 2012 Star Wars Legends novel Darth Plagueis, written by James Luceno. It notably established Plagueis to be a Muun just as Lucas proposed.', metadata={'source': 'https://starwars.fandom.com/wiki/Darth_Plagueis'}),\n",
657
+ " Document(page_content=\"―Sheev Palpatine\\nDarth Plagueis (pronounced /'pleɪɡ.əs/) was a Force-sensitive male Muun Dark Lord of the Sith and the Sith Master of Darth Sidious. Plagueis lusted for immortality, believing the secret laid in science. To that end, he worked with his Sith apprentice, conducting research into bioengineering and experimenting with his ability to influence the midi-chlorians to create life. In doing so, Plagueis acquired considerable knowledge of the Force, but he was ultimately betrayed and murdered by his own apprentice in accordance with the Rule of Two. Sidious later recounted the tale of his master's demise to lure the Jedi Knight Anakin Skywalker to the dark side of the Force, calling the fallen Sith Lord Darth Plagueis the Wise. After his own demise, Sidious used his master's teachings to cheat death, though true immortality still eluded him up until his final end.\", metadata={'source': 'https://starwars.fandom.com/wiki/Darth_Plagueis'}),\n",
658
+ " Document(page_content='Biography\\nEarly life\\nDarth Plagueis was a legendary Dark Lord of the Sith trained by the Sith Master Darth Tenebrous. At some point during his life, Plagueis acquired the protocol droid 11-4D. During his time as a Sith Lord and studying the Force, Plagueis acquired a vast amount of knowledge about the dark side and its teachings.\\n\\nMaster of Darth Sidious', metadata={'source': 'https://starwars.fandom.com/wiki/Darth_Plagueis'}),\n",
659
+ " Document(page_content='\"He became so powerful, the only thing he was afraid of was…losing his power, which eventually of course he did.\"\\n―Sheev Palpatine, to Anakin Skywalker\\n Darth Plagueis was a wise Sith Lord who became paranoid as he grew in power. \\nDarth Plagueis was a wise Dark Lord of the Sith who possessed a vast knowledge of the dark side of the Force. He was a Muun male, and had orange eyes. He was a meticulous planner, who tried to accomplish the Sith\\'s goal to replace the Galactic Republic with a Sith Empire. Due to his powers, he also became paranoid, believing that the Force could \"strike back\".', metadata={'source': 'https://starwars.fandom.com/wiki/Darth_Plagueis'}),\n",
660
+ " Document(page_content='\"Did you ever hear the Tragedy of Darth Plagueis the Wise?\"\\n―Sheev Palpatine, to Anakin Skywalker\\n Anakin Skywalker learned about the late Darth Plagueis as recounted by Sheev Palpatine. \\nIn the waning days of the Republic, Sidious recounted the history of his late master to Skywalker, whom he befriended years before the Clone Wars in an effort to gradually turn him to the dark side of the Force. Skywalker, who was unaware at the time that his friend was in fact a Sith Lord, and having developed premonitions of his secret wife\\'s death in childbirth, was intrigued by the story of Plagueis, particularly his ability to prevent death. When the fear of loss drove Skywalker to betray the Jedi Order, turning him into the Sith Lord Darth Vader, Sidious reformed the Republic into the Galactic Empire and declared himself Emperor. At the same time, he had the Jedi systematically executed throughout the galaxy by Order 66, completing the Sith plan that lasted for over a millennium.', metadata={'source': 'https://starwars.fandom.com/wiki/Darth_Plagueis'}),\n",
661
+ " Document(page_content='Although Legends works show that Plagueis had power over life and death, Lucas himself stated Palpatine\\'s tale about Plagueis\\' abilities was a lie. Canon also dispensed with the idea of these powers being a lie. The Biography gallery of Darth Sidious\\' Databank entry and the \"Senator Palpatine\" card part of 2020 Topps Star Wars Holocron Series both state that Plagueis was killed by Sidious after he took Darth Maul as his apprentice. However, the 2019 reference book Ultimate Star Wars, New Edition lists Plagueis\\' murder at Sidious\\' hands as taking place before Sidious takes Maul under his wing.', metadata={'source': 'https://starwars.fandom.com/wiki/Darth_Plagueis'}),\n",
662
+ " Document(page_content='\\nAt an opera show, Palpatine and Skywalker discussed the situation with Palpatine revealing to know that the Council wanted Skywalker to spy on him. At the same time, Skywalker, who was dealing with the fear of his wife\\'s loss was beginning to distrust the Jedi Council. This allowed Sidious to orchestrate Skywalker\\'s downfall; he claimed that the Jedi and the Sith were alike in nearly every way, setting the stage for the young Jedi to switch sides while playing to his devotion to the Jedi ideology. Next, Palpatine told him the story of his master, \"Darth Plagueis the Wise.\" According to the story, Plagueis possessed such strength in the Force that he could use it to prevent the ones closest to him from dying. Skywalker asked what happened to him, to which Palpatine said that he was afraid of losing his power, but had taught his apprentice everything, and then his apprentice killed him in his sleep. Skywalker, enthralled by the story, wished to know more about this power, asking him if he himself could learn it with Palpatine stating that, though possible, such things could not be taught by members of the Order. Skywalker, however, was unware that Palpatine was really talking about how he killed his mentor Plagueis, in order to lure him to the dark side.', metadata={'source': 'wookieepedia\\\\Darth_Sidious.txt'})]"
663
+ ]
664
+ },
665
+ "execution_count": 20,
666
+ "metadata": {},
667
+ "output_type": "execute_result"
668
+ }
669
+ ],
670
  "source": [
671
  "# Functions for possible interactive Wookieepedia querying and storing in the db\n",
672
  "\n",
 
683
  "# first_wookieepedia_result('Darth Plagueis')\n",
684
  "\n",
685
  "\n",
686
+ "def get_wookieepedia_page_content(query: str, previous_sources: set[str]) -> Document | None:\n",
687
+ " '''Return cleaned content from a Wookieepedia page provided it was not already sourced\n",
688
  " '''\n",
689
  " url = first_wookieepedia_result(query)\n",
690
  "\n",
691
+ " if url in previous_sources: return None\n",
692
  " else:\n",
693
+ " response = requests.get(url)\n",
694
+ " soup = BeautifulSoup(response.content, 'html.parser')\n",
695
+ " doc = soup.find('div', id = 'content').get_text()\n",
 
 
696
  "\n",
697
+ " # Cleaning\n",
698
+ " doc = doc.split('\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n')[-1] # The (multiple) preambles are separated by these many newlines; no harm done if not present\n",
699
+ " doc = re.sub('\\[\\d*\\]', '', doc) # References (and section title's \"[]\" suffixes) are noise\n",
700
+ " doc = doc.split('\\nAppearances\\n')[0] # Keep only content before these sections\n",
701
+ " doc = doc.split('\\nSources\\n')[0] # Technically no need to check this if successfully cut on appearances, but no harm done\n",
702
+ " doc = re.sub('Contents\\n\\n(?:[\\d\\.]+ [^\\n]+\\n+)+', '', doc) # Remove table of contents\n",
703
  "\n",
704
+ " return Document(page_content = doc, metadata = dict(source = url))\n",
705
+ "\n",
706
+ "# print(get_wookieepedia_page_content('Darth Plagueis', set()))\n",
707
+ "# print(get_wookieepedia_page_content('Darth Plagueis', set(md.get('source') for md in woo_db.get()['metadatas'])))\n",
708
  "\n",
709
  "\n",
710
  "def get_wookieepedia_context(original_query: str, simple_query: str, wdb: Chroma) -> list[Document]:\n",
711
  " try:\n",
712
+ " doc = get_wookieepedia_page_content(simple_query, previous_sources = set(md.get('source') for md in wdb.get()['metadatas']))\n",
713
+ " if doc is not None:\n",
714
+ " new_chunks = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200).split_documents([doc])\n",
715
+ " wdb.add_documents(new_chunks)\n",
716
+ " print(f\"Added new chunks (for '{simple_query}' -> {doc['metadata']['source']}) to the Wookieepedia database.\")\n",
717
  " except: return []\n",
718
  "\n",
719
  " return wdb.similarity_search(original_query, k = 10)\n",
 
731
  },
732
  {
733
  "cell_type": "code",
734
+ "execution_count": 21,
735
  "metadata": {},
736
  "outputs": [],
737
  "source": [
 
749
  },
750
  {
751
  "cell_type": "code",
752
+ "execution_count": 22,
753
  "metadata": {},
754
  "outputs": [],
755
  "source": [
 
782
  },
783
  {
784
  "cell_type": "code",
785
+ "execution_count": 23,
786
  "metadata": {},
787
  "outputs": [],
788
  "source": [
 
809
  },
810
  {
811
  "cell_type": "code",
812
+ "execution_count": 24,
813
  "metadata": {},
814
  "outputs": [],
815
  "source": [
 
835
  },
836
  {
837
  "cell_type": "code",
838
+ "execution_count": 25,
839
  "metadata": {},
840
  "outputs": [
841
  {
842
  "data": {
843
  "text/plain": [
844
  "{'input': \"Who participates in Han's rescue from Jabba? And where is the palace?\",\n",
845
+ " 'context': [Document(page_content=\"After the smuggler Han Solo failed to repay him for lost cargo, Jabba placed a high price on his head. Solo was eventually delivered to him by one of his bounty hunters, Boba Fett, as a gift from Darth Vader. However, this capture brought him to the attention of Jedi Knight Luke Skywalker, who sought to rescue his friend from Jabba's imprisonment. As he attempted to execute the Jedi and his allies in the Great Pit of Carkoon, Jabba was choked to death by Leia Organa. With the Hutts unable to decide who would inherit Jabba's criminal ventures, many of his slaves, including the Niktos, were free, and his palace was occupied by his former Majordomo Bib Fortuna, who took his place as Daimyo of Tatooine until Fett killed and usurped him. Fett sought to rebuild Jabba's criminal empire in his own image, intending to rule with respect rather than the fear that the Hutt instilled in his followers.\", metadata={'source': 'wookieepedia\\\\Jabba_Desilijic_Tiure.txt'}),\n",
846
+ " Document(page_content=\"Jabba sends Han Solo, Chewbacca, and Greedo to travel to Corellia to recover the ashes of archrival Krestrel D'Naran.\\nHan Solo meets Corbus Tyra, who claims to be his father, Ovan.\\nChewbacca rescues Han Solo from bounty hunter Krrsantan and they later conduct a heist on Coruscant. Although the urn they have been after does not contain the ashes, it contains the neural core of Ajax Sigma, which Han Solo and Chewbacca bury away.\\n8/7 BBY\\nJabba Desilijic Tiure sends Krrsantan to hunt down Obi-Wan Kenobi.\\n7 BBY\\nBodhi Rook enters the Terrabe Sector Service Academy to train to be a pilot.\\nThe mission to Horuz occurs.\\nUn'hee is born.\\nObi-Wan Kenobi rescues 12-year old Luke Skywalker and his friend Windy after they are left unconscious during a sandstorm.\\n6 BBY\\nThe mission to Inusagi occurs.[source?]\\nThe Malkhani Insurrections occurs.\\nThe Leonis family migrates to Lothal.\\nThe Westhills massacre occurs.\", metadata={'source': 'wookieepedia\\\\Timeline_of_galactic_history.txt'}),\n",
847
+ " Document(page_content=\"Djarin takes the Darksaber while Gideon is taken into New Republic custody, though he is broken out en route to trial.\\nAs Bo-Katan Kryze returned to her forces without the Darksaber, her Mandalorians abandon her and become privateers under Axe Woves.\\nThe Jedi Luke Skywalker beckons Grogu to train as a Jedi with him. Removing his Mandalorian helmet, Djarin gives his blessing to Grogu before letting the latter go.\\nLuke Skywalker builds a Jedi Temple on Ossus. After a brief time with Skywalker there, Grogu decides to return to Din Djarin. Jabba's Palace under new management \\nBoba Fett and Fennec Shand attack Jabba's Palace, replacing Bib Fortuna's place as master of the palace.\\nA gang war erupts on Tatooine, with Daimyo Boba Fett's Fett gotra defeating the Pyke Syndicate presence on the world and emerging as its leading criminal syndicate.\\nc. 9 ABY\", metadata={'source': 'wookieepedia\\\\Timeline_of_galactic_history.txt'}),\n",
848
+ " Document(page_content=\"At the chancellor's request, Windu dispatched Kenobi and Skywalker on a mission to save Jabba's son. \\nSoon afterward Windu received a message from the Advanced Recon Force Scout Troopers who tracked Rotta's location to an old monastery on the planet Teth. Due to the monastery's heavy fortification, Windu instructed the troopers to stand by and await Republic reinforcements. He then reassigned three of Admiral Wurtz's cruisers to the rescue operation. In spite of Count Dooku's plot to form an alliance between the Confederacy and the Hutt Clan, Rotta was ultimately redeemed from captivity by Anakin Skywalker and his new Padawan, Ahsoka Tano. The Separatists were defeated on Teth and Jabba was made aware of Dooku's conspiracy, which in turn allowed the Republic to secure passage through the Hyperspace lanes in Hutt territory.\\n\\nLiberation of Ryloth\", metadata={'source': 'wookieepedia\\\\Mace_Windu.txt'})],\n",
849
+ " 'answer': \"Han Solo is rescued from Jabba the Hutt by a group consisting of Jedi Knight Luke Skywalker, Princess Leia Organa (disguised as the bounty hunter Boushh), Chewbacca, Lando Calrissian, R2-D2, and C-3PO. The rescue takes place at Jabba's Palace on the desert planet of Tatooine.\"}"
850
  ]
851
  },
852
+ "execution_count": 25,
853
  "metadata": {},
854
  "output_type": "execute_result"
855
  }
 
864
  "))"
865
  ]
866
  },
867
+ {
868
+ "cell_type": "code",
869
+ "execution_count": 26,
870
+ "metadata": {},
871
+ "outputs": [],
872
+ "source": [
873
+ "simplify_query_prompt = ChatPromptTemplate.from_messages([\n",
874
+ " ('system', 'Given the above conversation, generate a search query to find a relevant page in the Star Wars fandom wiki; the query should be something simple, at most 4 words, such as the name of a character, place, event, item, etc.'),\n",
875
+ " MessagesPlaceholder('chat_history', optional = True), # Using this form since not clear how to have optional = True in the tuple form\n",
876
+ " ('human', '{query}')\n",
877
+ "])\n",
878
+ "\n",
879
+ "simplify_query_chain = simplify_query_prompt | llm | StrOutputParser() # To extract just the message\n",
880
+ "\n",
881
+ "# simplify_query_chain.invoke(dict(context = 'You are an expert of Star Wars lore', query = 'Do you know the tragedy of Darth Plagueis the Wise?'))"
882
+ ]
883
+ },
884
  {
885
  "cell_type": "markdown",
886
  "metadata": {},
 
890
  },
891
  {
892
  "cell_type": "code",
893
+ "execution_count": 27,
894
  "metadata": {},
895
  "outputs": [],
896
  "source": [
 
916
  },
917
  {
918
  "cell_type": "code",
919
+ "execution_count": 28,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
920
  "metadata": {},
921
  "outputs": [
922
  {
 
1034
  "and supplies in all directions. Suddenly they stop. Then \n",
1035
  "everything is quiet for a few moments. A great howling moan \n",
1036
  "is heard echoing throughout the canyon which sends the \n",
1037
+ "Sandpeople fleeing in terror.\u001b[0m\u001b[32;1m\u001b[1;3mThe dialogue from the Star Wars film scripts does not directly mention the tragedy of Darth Plagueis the Wise. However, in \"Star Wars: Episode III - Revenge of the Sith,\" Chancellor Palpatine tells Anakin Skywalker the story of Darth Plagueis the Wise. According to Palpatine, Darth Plagueis was a Dark Lord of the Sith who was so powerful and wise that he could influence the midi-chlorians to create life and prevent death. The tragedy of Darth Plagueis the Wise is that he was eventually betrayed and killed by his own apprentice, who was seeking to obtain his power. This story plays a significant role in Anakin's fall to the dark side as he becomes intrigued by the idea of cheating death, which ultimately leads him to become Darth Vader.\u001b[0m\n",
1038
  "\n",
1039
  "\u001b[1m> Finished chain.\u001b[0m\n"
1040
  ]
 
1045
  "{'chat_history': [HumanMessage(content='Are you knowledgeable about Star Wars?'),\n",
1046
  " AIMessage(content='Very')],\n",
1047
  " 'input': 'Do you know the tragedy of Darth Plagueis the Wise?',\n",
1048
+ " 'output': 'The dialogue from the Star Wars film scripts does not directly mention the tragedy of Darth Plagueis the Wise. However, in \"Star Wars: Episode III - Revenge of the Sith,\" Chancellor Palpatine tells Anakin Skywalker the story of Darth Plagueis the Wise. According to Palpatine, Darth Plagueis was a Dark Lord of the Sith who was so powerful and wise that he could influence the midi-chlorians to create life and prevent death. The tragedy of Darth Plagueis the Wise is that he was eventually betrayed and killed by his own apprentice, who was seeking to obtain his power. This story plays a significant role in Anakin\\'s fall to the dark side as he becomes intrigued by the idea of cheating death, which ultimately leads him to become Darth Vader.'}"
1049
  ]
1050
  },
1051
+ "execution_count": 28,
1052
  "metadata": {},
1053
  "output_type": "execute_result"
1054
  }
serve.py CHANGED
@@ -43,7 +43,7 @@ dotenv.load_dotenv()
43
  ## Vector stores
44
 
45
  script_db = Chroma(embedding_function = SentenceTransformerEmbeddings(model_name = 'all-MiniLM-L6-v2'), persist_directory = r'scripts\db')
46
- woo_db = Chroma(embedding_function = SentenceTransformerEmbeddings(model_name = 'all-MiniLM-L6-v2'), persist_directory = r'wookieepedia_db')
47
 
48
 
49
 
@@ -124,7 +124,18 @@ woo_retriever_prompt = ChatPromptTemplate.from_messages([
124
  woo_retriever_chain = create_history_aware_retriever(llm, woo_db.as_retriever(), woo_retriever_prompt) # Essentially just: prompt | llm | StrOutputParser() | retriever
125
 
126
 
127
- full_chain = create_retrieval_chain(script_retriever_chain, document_chain)
 
 
 
 
 
 
 
 
 
 
 
128
 
129
 
130
 
@@ -136,12 +147,13 @@ script_tool = create_retriever_tool(
136
  '''Search the Star Wars film scripts. This tool should be the first choice for Star Wars related questions.
137
  Queries passed to this tool should be lists of keywords likely to be in dialogue or scene descriptions, and should not include film titles.'''
138
  )
139
- wookieepedia_tool = create_retriever_tool(
140
  woo_db.as_retriever(search_kwargs = dict(k = 4)),
141
  'search_wookieepedia',
142
- 'Search the Star Wars fandom wiki. This tool should be used for queries about details of a particular character, location, event, weapon, etc., and the query should be something simple, such as the name of a character, place, event, item, etc.',
 
143
  )
144
- tools = [script_tool, wookieepedia_tool]
145
 
146
  agent_system_text = '''
147
  You are a helpful agent who is very knowledgeable about Star Wars and your job is to answer questions about its plot, characters, etc.
@@ -159,6 +171,7 @@ agent_executor = AgentExecutor(agent = agent, tools = tools, verbose = True)
159
 
160
 
161
 
 
162
 
163
  class StrInput(BaseModel):
164
  input: str
@@ -167,7 +180,7 @@ class Input(BaseModel):
167
  input: str
168
  chat_history: list[BaseMessage] = Field(
169
  ...,
170
- extra = {'widget': {'type': 'chat', 'input': 'location'}},
171
  )
172
 
173
  class Output(BaseModel):
@@ -175,7 +188,6 @@ class Output(BaseModel):
175
 
176
 
177
 
178
-
179
  ## App definition
180
 
181
  app = FastAPI(
@@ -185,9 +197,6 @@ app = FastAPI(
185
  )
186
 
187
 
188
-
189
- ## Adding chain route
190
-
191
  # add_routes(app, script_db.as_retriever())
192
  add_routes(app, full_chain.with_types(input_type = StrInput, output_type = Output), playground_type = 'default')
193
 
@@ -197,6 +206,7 @@ add_routes(app, full_chain.with_types(input_type = StrInput, output_type = Outpu
197
  # add_routes(app, agent_executor.with_types(input_type = StrInput, output_type = Output))
198
 
199
 
 
200
  if __name__ == '__main__':
201
  import uvicorn
202
 
 
43
  ## Vector stores
44
 
45
  script_db = Chroma(embedding_function = SentenceTransformerEmbeddings(model_name = 'all-MiniLM-L6-v2'), persist_directory = r'scripts\db')
46
+ woo_db = Chroma(embedding_function = SentenceTransformerEmbeddings(model_name = 'all-MiniLM-L6-v2'), persist_directory = r'wookieepedia\db')
47
 
48
 
49
 
 
124
  woo_retriever_chain = create_history_aware_retriever(llm, woo_db.as_retriever(), woo_retriever_prompt) # Essentially just: prompt | llm | StrOutputParser() | retriever
125
 
126
 
127
+ # full_chain = create_retrieval_chain(script_retriever_chain, document_chain)
128
+ full_chain = create_retrieval_chain(woo_retriever_chain, document_chain)
129
+
130
+
131
+
132
+ simplify_query_prompt = ChatPromptTemplate.from_messages([
133
+ ('system', 'Given the above conversation, generate a search query to find a relevant page in the Star Wars fandom wiki; the query should be something simple, at most 4 words, such as the name of a character, place, event, item, etc.'),
134
+ MessagesPlaceholder('chat_history', optional = True), # Using this form since not clear how to have optional = True in the tuple form
135
+ ('human', '{query}')
136
+ ])
137
+
138
+ simplify_query_chain = simplify_query_prompt | llm | StrOutputParser() # To extract just the message
139
 
140
 
141
 
 
147
  '''Search the Star Wars film scripts. This tool should be the first choice for Star Wars related questions.
148
  Queries passed to this tool should be lists of keywords likely to be in dialogue or scene descriptions, and should not include film titles.'''
149
  )
150
+ woo_tool = create_retriever_tool(
151
  woo_db.as_retriever(search_kwargs = dict(k = 4)),
152
  'search_wookieepedia',
153
+ 'Search the Star Wars fandom wiki. This tool should be the first choice for Star Wars related questions.'
154
+ # This tool should be used for queries about details of a particular character, location, event, weapon, etc., and the query should be something simple, such as the name of a character, place, event, item, etc.'''
155
  )
156
+ tools = [script_tool, woo_tool]
157
 
158
  agent_system_text = '''
159
  You are a helpful agent who is very knowledgeable about Star Wars and your job is to answer questions about its plot, characters, etc.
 
171
 
172
 
173
 
174
+ ## Type specifications (with unusual class-scope fields)
175
 
176
  class StrInput(BaseModel):
177
  input: str
 
180
  input: str
181
  chat_history: list[BaseMessage] = Field(
182
  ...,
183
+ extra = dict(widget = dict(type = 'chat', input = 'location')),
184
  )
185
 
186
  class Output(BaseModel):
 
188
 
189
 
190
 
 
191
  ## App definition
192
 
193
  app = FastAPI(
 
197
  )
198
 
199
 
 
 
 
200
  # add_routes(app, script_db.as_retriever())
201
  add_routes(app, full_chain.with_types(input_type = StrInput, output_type = Output), playground_type = 'default')
202
 
 
206
  # add_routes(app, agent_executor.with_types(input_type = StrInput, output_type = Output))
207
 
208
 
209
+
210
  if __name__ == '__main__':
211
  import uvicorn
212
 
wookieepedia.ipynb CHANGED
The diff for this file is too large to render. See raw diff