{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Star Wars Expert" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from langchain_openai import ChatOpenAI#, OpenAIEmbeddings # No need to pay for using embeddings as well when have free alternatives\n", "\n", "# Data\n", "from langchain_community.document_loaders import DirectoryLoader, TextLoader, WebBaseLoader\n", "# from langchain_chroma import Chroma # The documentation uses this one, but it is extremely recent, and the same functionality is available in langchain_community and langchain (which imports community)\n", "from langchain_community.vectorstores import Chroma # This has documentation on-hover, while the indirect import through non-community does not\n", "from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings # The free alternative (also the default in docs, with model_name = 'all-MiniLM-L6-v2')\n", "from langchain.text_splitter import RecursiveCharacterTextSplitter#, TextSplitter # Recursive to better keep related bits contiguous (also recommended in docs: https://python.langchain.com/docs/modules/data_connection/document_transformers/)\n", "\n", "# Chains\n", "from langchain.prompts import PromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate, MessagesPlaceholder\n", "from langchain_core.output_parsers import StrOutputParser\n", "from langchain.chains.combine_documents import create_stuff_documents_chain\n", "from langchain.chains import create_history_aware_retriever, create_retrieval_chain\n", "from langchain.tools.retriever import create_retriever_tool\n", "from langchain_core.runnables import RunnablePassthrough, RunnableParallel, chain\n", "from langchain_core.pydantic_v1 import BaseModel, Field\n", "\n", "# Agents\n", "from langchain import hub\n", "from langchain.agents import create_tool_calling_agent, AgentExecutor\n", "\n", "# To manually create inputs to test pipelines\n", "from langchain_core.messages import HumanMessage, AIMessage\n", "from langchain_core.documents import Document\n", "\n", "# # Custom retriever\n", "# from langchain_core.callbacks import CallbackManagerForRetrieverRun\n", "# from langchain_core.documents import Document\n", "# from langchain_core.retrievers import BaseRetriever\n", "\n", "import requests\n", "from bs4 import BeautifulSoup\n", "from tqdm.auto import tqdm\n", "\n", "import os\n", "import shutil\n", "from pathlib import Path\n", "import re\n", "\n", "import dotenv\n", "dotenv.load_dotenv()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Data Loaders\n", "NOTE: running the chunk below deletes the database file adds data to the database, since content is duplicated otherwise" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Film Scripts" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Deleting the previous database and creating a new one (because otherwise content is duplicated in the db every time this block is run)\n", "The script database contains 1260 chunks, with mean length of 892 characters\n" ] } ], "source": [ "# Comparison of vector dbs: https://zackproser.com/blog/vector-databases-compared\n", "# Opinion: Milvus (more features, bigger community, higher performance(?), fully free, no enterprise plans) > Weaviate > Chroma\n", "# However Milvus and Weaviate both require a separate instance to be up and running\n", "# (The documentation uses FAISS, but it seems unnecessarily limited in comparison)\n", "# Hence Chroma - https://python.langchain.com/docs/integrations/vectorstores/chroma/\n", "\n", "# Separately, no need to pay for OpenAIEmbeddings; additionally, all-MiniLM-L6-v2 is default in docs\n", "\n", "REGENERATE_SCRIPT_DATABASE = False\n", "\n", "if (db_exists := os.path.exists(db_dir := str(Path('scripts') / 'db')):\n", " if REGENERATE_SCRIPT_DATABASE:\n", " print('Deleting the previous database and creating a new one (because otherwise content is duplicated in the db every time this block is run)')\n", " shutil.rmtree(db_dir)\n", " else: script_db = Chroma(embedding_function = SentenceTransformerEmbeddings(model_name = 'all-MiniLM-L6-v2'), persist_directory = db_dir)\n", "\n", "if not db_exists or (db_exists and REGENERATE_SCRIPT_DATABASE): # Unfortunate disjoining of the two conditional blocks\n", " scripts = DirectoryLoader('scripts', glob = '*.txt', loader_cls = TextLoader).load()\n", " for s in scripts: s.page_content = re.sub(r'^[\\t ]+', '', s.page_content, flags = re.MULTILINE) # Spacing to centre text noise\n", "\n", " script_chunks = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200, separators = ['\\n\\n\\n', '\\n\\n', '\\n']).split_documents(scripts)\n", " # Why not some overlap for extra context just in case?\n", " # Also, no need for fancier sentence or semantic splitting in this highly-formatted text\n", "\n", " script_db = Chroma.from_documents(script_chunks, SentenceTransformerEmbeddings(model_name = 'all-MiniLM-L6-v2'), persist_directory = db_dir)\n", "\n", "print(f'The script database contains {len(script_db)} chunks, with mean length of {sum(len(s) for s in script_db.get()[\"documents\"]) / len(script_db):.0f} characters')\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Document(page_content=\"LUKE\\nNo, my father didn't fight in the \\nwars. He was a navigator on a spice \\nfreighter.\\n\\nBEN\\nThat's what your uncle told you. He \\ndidn't hold with your father's ideals. \\nThought he should have stayed here \\nand not gotten involved.\\n\\nLUKE\\nYou fought in the Clone Wars?\\n\\nBEN\\nYes, I was once a Jedi Knight the \\nsame as your father.\\n\\nLUKE\\nI wish I'd known him.\\n\\nBEN\\nHe was the best star-pilot in the \\ngalaxy, and a cunning warrior. I \\nunderstand you've become quite a \\ngood pilot yourself. And he was a \\ngood friend. Which reminds me...\\n\\nBen gets up and goes to a chest where he rummages around.\\nAs Luke finishes repairing Threepio and starts to fit the \\nrestraining bolt back on, Threepio looks at him nervously.\\nLuke thinks about the bolt for a moment then puts it on the \\ntable. Ben shuffles up and presents Luke with a short handle \\nwith several electronic gadgets attached to it.\", metadata={'source': 'scripts\\\\Episode IV - A New Hope.txt'}),\n", " Document(page_content=\"LUKE (turning away, derisive)\\nA certain point of view!\\n\\nBEN\\nLuke, you're going to find that many of the truths we cling to depend \\ngreatly on our own\\npoint of view.\\n\\nLuke is unresponsive. Ben studies him in silence for a moment.\\n\\nBEN\\nI don't blame you for being angry. If I was wrong in what I did, it \\ncertainly wouldn't have been for the first time. You see, what happened \\nto your father was my fault.\\n\\nBen pauses sadly.\\n\\nBEN\\nAnakin was a good friend.\\n\\nLuke turns with interest at this. As Ben speaks, Luke settles on a \\nstump, mesmerized. Artoo comes over to offer his comforting presence.\\n\\nBEN\\nWhen I first knew him, your father was already a great pilot. But I was \\namazed how strongly the Force was with him. I took it upon myself to \\ntrain him as a Jedi. I thought that I could instruct him just as well \\nas Yoda. I was wrong. My pride has had terrible consequences for the \\ngalaxy.\\n\\nLuke is entranced.\\n\\nLUKE\\nThere's still good in him.\", metadata={'source': 'scripts\\\\Episode VI - Return of the Jedi.txt'}),\n", " Document(page_content=\"LUKE\\nNever-r-r!\\n\\nLuke ignites his lightsaber and screams in anger, rushing at hisfather \\nwith a frenzy we have not seen before. Sparks fly as Luke and Vader \\nfight in the cramped area. Luke's hatred forces Vader to retreat out of \\nthe low area and across a bridge overlooking a vast elevator shaft. \\nEach stroke of Luke's sword drives his father further toward defeat.\\n\\nThe Dark Lord is knocked to his knees, and as he raises his sword to \\nblock another onslaught, Luke slashes Vader's right hand off at the \\nwrist, causing metal and electronic parts to fly from the mechanical \\nstump. Vader's sword clatters uselessly away, over the\\nedge of the platform and into the bottomless shaft below. Luke moves \\nover Vader and holds the blade of his sword to the Dark Lord's throat. \\nThe Emperor watches with uncontrollable, pleased agitation.\\n\\nEMPEROR\\nGood! Your hate has made you powerful. Now, fulfill your destiny and \\ntake your father's\\nplace at my side!\", metadata={'source': 'scripts\\\\Episode VI - Return of the Jedi.txt'}),\n", " Document(page_content=\"Luke hasn't really been listening.\\n\\nLUKE\\nHow did my father die?\\n\\nBEN\\nA young Jedi named Darth Vader, who \\nwas a pupil of mine until he turned \\nto evil, helped the Empire hunt down \\nand destroy the Jedi Knights. He \\nbetrayed and murdered your father. \\nNow the Jedi are all but extinct. \\nVader was seduced by the dark side \\nof the Force.\\n\\nLUKE\\nThe Force?\\n\\nBEN\\nWell, the Force is what gives a Jedi \\nhis power. It's an energy field \\ncreated by all living things. It \\nsurrounds us and penetrates us. It \\nbinds the galaxy together.\\n\\nArtoo makes beeping sounds.\\n\\nBEN\\nNow, let's see if we can't figure \\nout what you are, my little friend. \\nAnd where you come from.\\n\\nLUKE\\nI saw part of the message he was...\\n\\nLuke is cut short as the recorded image of the beautiful \\nyoung Rebel princess is projected from Artoo's face.\\n\\nBEN\\nI seem to have found it.\\n\\nLuke stops his work as the lovely girl's image flickers before \\nhis eyes.\", metadata={'source': 'scripts\\\\Episode IV - A New Hope.txt'}),\n", " Document(page_content=\"He catches his breath. A shiver runs through the ancient green \\ncreature, and he dies. Luke stares at his dead master as he disappears \\nin front of his eyes.\\n\\n51EXT DAGOBAH SWAMP - X-WING \\n\\nLuke wanders back to where his ship is sitting. Artoo beeps a greeting, \\nbut is ignored by his depressed master. Luke kneels down, begins to \\nhelp Artoo with the ship, then stops and shakes his head dejectedly.\\n\\nLUKE\\nI can't do it, Artoo. I can't go on alone.\\n\\nBEN (OS)\\nYoda will always be with you.\\n\\nLuke looks up to see the shimmering image of BEN KENOBI.\\n\\nLUKE\\nObi-Wan! Why didn't you tell me?\\n\\nThe ghost of Ben Kenobi approaches him through the swamp.\\n\\nLUKE\\nYou told me Vader betrayed and murdered my father.\\n\\nBEN\\nYou father was seduced by the dark side of the Force. He ceased to be \\nAnakin Skywalker\\nand became Darth Vader. When that happened, the good man who was your \\nfather was destroyed. So what I have told you was true... from a \\ncertain point of view.\", metadata={'source': 'scripts\\\\Episode VI - Return of the Jedi.txt'}),\n", " Document(page_content=\"Luke glances at the instrument complex floating away.At that instant, \\nVader's sword comes down across Luke's right forearm, cutting off his \\nhand and sending his sword flying.In great pain, Luke squeezes his \\nforearm under his left armpit and moves back along the gantry to its \\nextreme end.Vader\\nfollows.The wind subsides.Luke holds on.There is nowhere else to \\ngo.\\n\\nVADER\\nThere is no escape.Don't make \\nme destroy you.You do not yet \\nrealize your importance.You \\nhave only begun to discover your \\npower.Join me and I will complete \\nyour training.With our combined \\nstrength, we can end this destructive \\nconflict and bring order to the \\ngalaxy.\\n\\nLUKE\\nI'll never join you!\\n\\nVADER\\nIf you only knew the power of the \\ndark side.Obi-Wan never told \\nyou what happened to your father.\\n\\nLUKE\\nHe told me enough!It was you \\nwho killed him.\\n\\nVADER\\nNo.I am your father.\\n\\nShocked, Luke looks at Vader in utter disbelief.\\n\\nLUKE\\nNo.No.That's not true!\\nThat's impossible!\", metadata={'source': 'scripts\\\\Episode V - The Empire Strikes Back.txt'}),\n", " Document(page_content=\"Out of the window and on the view screens, the Rebel fleet is being \\ndecimated in blinding explosions of light and debris. But in here there \\nis no sound of battle. The Emperor turns to Luke.\\n\\nEMPEROR\\nYour fleet has lost. And your friends on the Endor moon will not \\nsurvive. There is no escape, my young apprentice. The Alliance will \\ndie...as will your friends.\\n\\nLuke's eyes are full of rage. Vader watches him.\\n\\nEMPEROR\\nGood. I can feel your anger. I am defenseless.Take your weapon! \\nStrike me down with all your hatred, and your journey towards the dark \\nside will be complete.\\n\\nLuke can resist no longer. The lightsaber flies into his hand. He \\nignites it in an instant and swings at the Emperor. Vader's lightsaber \\nflashes into view, blocking Luke's blow before it can reach the \\nEmperor. The two blades spark at contact. Luke turns to fight his \\nfather.\\n\\n115EXT FOREST\", metadata={'source': 'scripts\\\\Episode VI - Return of the Jedi.txt'}),\n", " Document(page_content=\"VADER (a whisper)\\nLuke, help me take this mask off.\\n\\nLURE\\nBut you'll die.\\n\\nVADER\\nNothing can stop that now. Just for once... let me look on you with my \\nown eyes.\\n\\nSlowly, hesitantly, Luke removes the mask from his father's face. There \\nbeneath the scars is an elderly man. His eyes do not focus. But the \\ndying man smiles at the sight before him.\\n\\nANAKIN (very weak)\\nNow...go, my son. Leave me.\\n\\nLUKE\\nNo. You're coming with me. I can't leave you here. I've got to save \\nyou.\\n\\nANAKIN\\nYou already have, Luke. You were right about me. Tell your sister...you \\nwere right.\\n\\nLUKE\\nFather...I won't leave you.\\n\\nDarth Vader, Anakin Skywalker...Luke's father, dies.\\n\\nA huge explosion rocks the docking bay. Slowly, Luke rises and, half \\ncarrying, half dragging the body of his father, stumbles toward a \\nshuttle.\\n\\n131EXT DEATH STAR\", metadata={'source': 'scripts\\\\Episode VI - Return of the Jedi.txt'}),\n", " Document(page_content=\"LUKE (with sadness)\\nI found out Darth Vader was my father.\\n\\nBEN\\nTo be a Jedi, Luke, you must confront and then go beyond the dark side \\n- the side your father couldn't get past. Impatience is the easiest \\ndoor - for you, like your father. Only, your father was seduced by what \\nhe found on the other side of the door, and you have held firm. You're \\nno longer so reckless now, Luke. You are strong and patient. And now, \\nyou must face Darth Vader again!\\n\\nLUKE\\nI can't kill my own father.\\n\\nBEN\\nThen the Emperor has already won. You were our only hope.\\n\\nLUKE\\nYoda spoke of another.\\n\\nBEN\\nThe other he spoke of is your twin sister.\\n\\nLUKE\\nBut I have no sister.\\n\\nBEN\\nHmm. To protect you both from the Emperor, you were hidden from your \\nfather when you were born. The Emperor knew, as I did, if Anakin were \\nto have any offspring, they would be a threat to him. That is the \\nreason why your sister remains safely anonymous.\\n\\nLUKE\\nLeia! Leia's my sister.\", metadata={'source': 'scripts\\\\Episode VI - Return of the Jedi.txt'}),\n", " Document(page_content=\"LUKE\\nLeia! Leia's my sister.\\n\\nBEN\\nYour insight serves you well. Bury your feelings deep down, Luke. They \\ndo you credit.\\nBut they could be made to serve the Emperor.\\n\\nLuke looks into the distance, trying to comprehend all this.\\n\\nBEN (continuing his narrative)\\nWhen your father left, he didn't know your mother was pregnant. Your \\nmother and I knew he would find out eventually, but we wanted to keep \\nyou both as safe as possible, for as long as possible.So I took you \\nto live with my brother Owen on Tatooine... and your mother took Leia \\nto live as the daughter of Senator Organa, on Alderaan.\\n\\nLuke turns, and settles near Ben to hear the tale.\", metadata={'source': 'scripts\\\\Episode VI - Return of the Jedi.txt'})]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Query testing\n", "\n", "res = script_db.similarity_search('Luke father reveal fight', k = 10)\n", "\n", "# for r in res: print(r.page_content)\n", "res" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Wookieepedia Articles" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The Wookieepedia database contains 10927 chunks, with mean length of 645 characters\n", "Current source pages in Wookieepedia db:\n", "395\n" ] }, { "data": { "text/plain": [ "{'https://starwars.fandom.com/wiki/Darth_Plagueis',\n", " 'https://starwars.fandom.com/wiki/Voss',\n", " 'wookieepedia\\\\1995_Topps_Star_Wars_Widevision.txt',\n", " 'wookieepedia\\\\1995_Topps_Star_Wars___The_Empire_Strikes_Back_Widevision.txt',\n", " 'wookieepedia\\\\1996_Topps_Star_Wars___Return_of_the_Jedi_Widevision.txt',\n", " 'wookieepedia\\\\2_systems_control_droid.txt',\n", " 'wookieepedia\\\\501st_Legion.txt',\n", " 'wookieepedia\\\\Aayla_Secura.txt',\n", " 'wookieepedia\\\\Abandoned_sarlacc_pit.txt',\n", " 'wookieepedia\\\\Acclamator-class_transgalactic_military_assault_ship.txt',\n", " 'wookieepedia\\\\Ackbar_(comic).txt',\n", " 'wookieepedia\\\\Age_of_Resistance_-_General_Hux_1.txt',\n", " 'wookieepedia\\\\Alderaan.txt',\n", " 'wookieepedia\\\\Alderaan_Cruiser.txt',\n", " 'wookieepedia\\\\Amee.txt',\n", " 'wookieepedia\\\\Amidalans.txt',\n", " 'wookieepedia\\\\Anakin_Skywalker.txt',\n", " 'wookieepedia\\\\Area_D-512.txt',\n", " 'wookieepedia\\\\Ask_Aak.txt',\n", " 'wookieepedia\\\\Assembly.txt',\n", " 'wookieepedia\\\\Asteroid.txt',\n", " 'wookieepedia\\\\Asteroid_Belt_Gas_Refinery.txt',\n", " 'wookieepedia\\\\Attack_on_Fondor.txt',\n", " 'wookieepedia\\\\Avenger.txt',\n", " 'wookieepedia\\\\Bacara.txt',\n", " 'wookieepedia\\\\Bail.txt',\n", " 'wookieepedia\\\\Bail_Organa%27s_resistance_movement.txt',\n", " 'wookieepedia\\\\Bakur_Memorial_Building.txt',\n", " 'wookieepedia\\\\Bana_Breemu.txt',\n", " 'wookieepedia\\\\Barge_driver.txt',\n", " 'wookieepedia\\\\Bartender.txt',\n", " 'wookieepedia\\\\Battle_Stations_of_the_Empire_and_the_First_Order.txt',\n", " 'wookieepedia\\\\Battle_droid.txt',\n", " 'wookieepedia\\\\Battle_of_Naboo_(YJCCG).txt',\n", " 'wookieepedia\\\\Battle_of_Yavin.txt',\n", " 'wookieepedia\\\\Battlefield_Holographic_Control_Interface.txt',\n", " 'wookieepedia\\\\Ben.txt',\n", " 'wookieepedia\\\\Bespin.txt',\n", " 'wookieepedia\\\\Bespin_system.txt',\n", " 'wookieepedia\\\\Bib.txt',\n", " 'wookieepedia\\\\Bibble_UP.txt',\n", " 'wookieepedia\\\\Biggs_Darklighter.txt',\n", " 'wookieepedia\\\\Bly.txt',\n", " 'wookieepedia\\\\Boba_Fett.txt',\n", " 'wookieepedia\\\\Bodyguard.txt',\n", " 'wookieepedia\\\\Bottoms.txt',\n", " 'wookieepedia\\\\Boushh.txt',\n", " 'wookieepedia\\\\Bravo_Three.txt',\n", " 'wookieepedia\\\\Bravo_Two.txt',\n", " 'wookieepedia\\\\Bren_Derlin.txt',\n", " 'wookieepedia\\\\Bridge.txt',\n", " 'wookieepedia\\\\Bunker_Krill.txt',\n", " 'wookieepedia\\\\Camie_Marstrap.txt',\n", " 'wookieepedia\\\\Captain.txt',\n", " 'wookieepedia\\\\Captain_Antilles.txt',\n", " 'wookieepedia\\\\Cargo_of_Doom.txt',\n", " 'wookieepedia\\\\Category__Naboo_starfighter_classes.txt',\n", " 'wookieepedia\\\\Cato_Neimoidia.txt',\n", " 'wookieepedia\\\\Cato_Parasitti.txt',\n", " 'wookieepedia\\\\Chi_Eekway_Papanoida.txt',\n", " 'wookieepedia\\\\Chief.txt',\n", " 'wookieepedia\\\\Children.txt',\n", " 'wookieepedia\\\\Citadel.txt',\n", " 'wookieepedia\\\\Cloak_of_Darkness.txt',\n", " 'wookieepedia\\\\Clone_Captain.txt',\n", " 'wookieepedia\\\\Clone_Commander_Cody_Commander_Expansion.txt',\n", " 'wookieepedia\\\\Clone_Sergeant.txt',\n", " 'wookieepedia\\\\Clone_trooper.txt',\n", " 'wookieepedia\\\\Cloud_City.txt',\n", " 'wookieepedia\\\\Cockpit.txt',\n", " 'wookieepedia\\\\Command_center.txt',\n", " 'wookieepedia\\\\Commander.txt',\n", " 'wookieepedia\\\\Commando_droid_captain.txt',\n", " 'wookieepedia\\\\Controller.txt',\n", " 'wookieepedia\\\\Cord.txt',\n", " 'wookieepedia\\\\Coruscant.txt',\n", " 'wookieepedia\\\\Count_Dooku_(article).txt',\n", " 'wookieepedia\\\\Creature_Safari.txt',\n", " 'wookieepedia\\\\Cruiser.txt',\n", " 'wookieepedia\\\\Dack.txt',\n", " 'wookieepedia\\\\Dagobah.txt',\n", " 'wookieepedia\\\\Darth_Sidious.txt',\n", " 'wookieepedia\\\\Databank_(website).txt',\n", " 'wookieepedia\\\\Davijaan.txt',\n", " 'wookieepedia\\\\Daye_Azur-Jamin.txt',\n", " 'wookieepedia\\\\Deadly_Reunion.txt',\n", " 'wookieepedia\\\\Deak.txt',\n", " 'wookieepedia\\\\Death.txt',\n", " 'wookieepedia\\\\Death_Star%27s_shield_generator.txt',\n", " 'wookieepedia\\\\Death_Star.txt',\n", " 'wookieepedia\\\\Death_Star_II.txt',\n", " 'wookieepedia\\\\Death_Star_II_Limited.txt',\n", " 'wookieepedia\\\\Deceived.txt',\n", " 'wookieepedia\\\\Deck_officer.txt',\n", " 'wookieepedia\\\\Detention_area.txt',\n", " 'wookieepedia\\\\Din_Grogu.txt',\n", " 'wookieepedia\\\\Dodonna.txt',\n", " 'wookieepedia\\\\Dofine_family.txt',\n", " 'wookieepedia\\\\Dooku.txt',\n", " 'wookieepedia\\\\Dorm%C3%A9.txt',\n", " 'wookieepedia\\\\Droid_guard_ship.txt',\n", " 'wookieepedia\\\\Dune_Sea.txt',\n", " 'wookieepedia\\\\Dungeon_ship.txt',\n", " 'wookieepedia\\\\Durge.txt',\n", " 'wookieepedia\\\\E%27ronoh.txt',\n", " 'wookieepedia\\\\Eastern_sand_dune.txt',\n", " 'wookieepedia\\\\Elan.txt',\n", " 'wookieepedia\\\\Elan_Sel%27Sabagno.txt',\n", " 'wookieepedia\\\\Emperor.txt',\n", " 'wookieepedia\\\\Endor.txt',\n", " 'wookieepedia\\\\Endor_Limited.txt',\n", " 'wookieepedia\\\\Enoch.txt',\n", " 'wookieepedia\\\\Entertainment_Center.txt',\n", " 'wookieepedia\\\\Episode_I_Adventures_13___Danger_on_Naboo.txt',\n", " 'wookieepedia\\\\Escape_pod.txt',\n", " 'wookieepedia\\\\Ewok.txt',\n", " 'wookieepedia\\\\Ewok_Village.txt',\n", " 'wookieepedia\\\\Ewoks___The_Battle_for_Endor.txt',\n", " 'wookieepedia\\\\Fanta.txt',\n", " 'wookieepedia\\\\Felucia.txt',\n", " 'wookieepedia\\\\Fireball_(clone_pilot).txt',\n", " 'wookieepedia\\\\Firmus_Piett.txt',\n", " 'wookieepedia\\\\First_Trooper.txt',\n", " 'wookieepedia\\\\Fixer.txt',\n", " 'wookieepedia\\\\Force-sensitive_holding_cell.txt',\n", " 'wookieepedia\\\\Forest.txt',\n", " 'wookieepedia\\\\Four.txt',\n", " 'wookieepedia\\\\Galaxy.txt',\n", " 'wookieepedia\\\\Gantry_officer.txt',\n", " 'wookieepedia\\\\General_Grievous_1.txt',\n", " 'wookieepedia\\\\General_Madine%27s_Report_on_Operation_Cobolt.txt',\n", " 'wookieepedia\\\\Gold_Five.txt',\n", " 'wookieepedia\\\\Gold_Two_(rebellion).txt',\n", " 'wookieepedia\\\\Gonky_(Clone_Force_99).txt',\n", " 'wookieepedia\\\\Gray_Leader.txt',\n", " 'wookieepedia\\\\Great_Temple.txt',\n", " 'wookieepedia\\\\Gree_(clone_trooper).txt',\n", " 'wookieepedia\\\\Greedo.txt',\n", " 'wookieepedia\\\\Greef_Karga.txt',\n", " 'wookieepedia\\\\Gregar_Typho.txt',\n", " 'wookieepedia\\\\Guard.txt',\n", " 'wookieepedia\\\\Guardstar.txt',\n", " 'wookieepedia\\\\Gunship.txt',\n", " 'wookieepedia\\\\Han_(disambiguation).txt',\n", " 'wookieepedia\\\\Hangar.txt',\n", " 'wookieepedia\\\\Hermione.txt',\n", " 'wookieepedia\\\\Honest_Ellam%27s_Speeder_Lot.txt',\n", " 'wookieepedia\\\\Hoth.txt',\n", " 'wookieepedia\\\\Hull.txt',\n", " 'wookieepedia\\\\Human.txt',\n", " 'wookieepedia\\\\Hyperspace.txt',\n", " 'wookieepedia\\\\Imperial_I-class_Star_Destroyer.txt',\n", " 'wookieepedia\\\\Imperial_Sourcebook_(Second_Edition).txt',\n", " 'wookieepedia\\\\Imperial_Star_Destroyer_Cymoon_1_Refit.txt',\n", " 'wookieepedia\\\\Imperial_officer.txt',\n", " 'wookieepedia\\\\Imperial_shuttle.txt',\n", " 'wookieepedia\\\\Imperial_soldier.txt',\n", " 'wookieepedia\\\\Infiltration_(episode).txt',\n", " 'wookieepedia\\\\Infiltration_of_Bezz_Drexx%27s_tower.txt',\n", " 'wookieepedia\\\\Into_the_Breach.txt',\n", " 'wookieepedia\\\\J-type_star_skiff.txt',\n", " 'wookieepedia\\\\Jabba_Desilijic_Tiure.txt',\n", " 'wookieepedia\\\\Jamillia.txt',\n", " 'wookieepedia\\\\Jango_Fett.txt',\n", " 'wookieepedia\\\\Janson_Hidreck.txt',\n", " 'wookieepedia\\\\Jar_Jar_Binks.txt',\n", " 'wookieepedia\\\\Jedi.txt',\n", " 'wookieepedia\\\\Jedi_Temple.txt',\n", " 'wookieepedia\\\\Jerjerrod_(admiral).txt',\n", " 'wookieepedia\\\\Jira.txt',\n", " 'wookieepedia\\\\Jocasta_Nu.txt',\n", " 'wookieepedia\\\\Joval_Rykk.txt',\n", " 'wookieepedia\\\\KE-UW33.txt',\n", " 'wookieepedia\\\\Karthon_Chop_Fields.txt',\n", " 'wookieepedia\\\\Kashyyyk.txt',\n", " 'wookieepedia\\\\Kea_Moll.txt',\n", " 'wookieepedia\\\\Ki-Adi-Mundi.txt',\n", " 'wookieepedia\\\\Kitster_Chanchani_Banai.txt',\n", " 'wookieepedia\\\\KnollVision.txt',\n", " 'wookieepedia\\\\Lama_Su.txt',\n", " 'wookieepedia\\\\Land_City.txt',\n", " 'wookieepedia\\\\Landing_Platform_Nine.txt',\n", " 'wookieepedia\\\\Landing_at_Point_Rain.txt',\n", " 'wookieepedia\\\\Landonis_Balthazar_Calrissian.txt',\n", " 'wookieepedia\\\\Lars_homestead.txt',\n", " 'wookieepedia\\\\Legends.txt',\n", " 'wookieepedia\\\\Leia_Skywalker_Organa_Solo.txt',\n", " 'wookieepedia\\\\Lieutenant.txt',\n", " 'wookieepedia\\\\Lott_Dod.txt',\n", " 'wookieepedia\\\\Luke_Skywalker.txt',\n", " 'wookieepedia\\\\Lure_of_the_Lost.txt',\n", " 'wookieepedia\\\\Mace.txt',\n", " 'wookieepedia\\\\Mace_Windu.txt',\n", " 'wookieepedia\\\\Main_Page.txt',\n", " 'wookieepedia\\\\Mas_Amedda.txt',\n", " 'wookieepedia\\\\Massassi.txt',\n", " 'wookieepedia\\\\Maximilian_Veers.txt',\n", " 'wookieepedia\\\\Maz_Kanata.txt',\n", " 'wookieepedia\\\\Medical_droid.txt',\n", " 'wookieepedia\\\\Millennium.txt',\n", " 'wookieepedia\\\\Millennium_Falcon.txt',\n", " 'wookieepedia\\\\Mon_Mothma.txt',\n", " 'wookieepedia\\\\More_than_a_Hobbie!.txt',\n", " 'wookieepedia\\\\Morgan_Elsbeth.txt',\n", " 'wookieepedia\\\\Mos_Eisley.txt',\n", " 'wookieepedia\\\\Mos_Espa.txt',\n", " 'wookieepedia\\\\Mote%C3%A9.txt',\n", " 'wookieepedia\\\\Motti_family.txt',\n", " 'wookieepedia\\\\Mustafar.txt',\n", " 'wookieepedia\\\\Mygeeto.txt',\n", " 'wookieepedia\\\\Naboo.txt',\n", " 'wookieepedia\\\\Naboo_Fighter_Pilot.txt',\n", " 'wookieepedia\\\\Naboo_Palace_Guard.txt',\n", " 'wookieepedia\\\\Naboo_Royal_Handmaidens.txt',\n", " 'wookieepedia\\\\Naboo_swamp_moss.txt',\n", " 'wookieepedia\\\\Nas.txt',\n", " 'wookieepedia\\\\Navigator.txt',\n", " 'wookieepedia\\\\Needan.txt',\n", " 'wookieepedia\\\\New_Hope.txt',\n", " 'wookieepedia\\\\Nightclub.txt',\n", " 'wookieepedia\\\\Nute_Gunray.txt',\n", " 'wookieepedia\\\\Obi-Wan_Kenobi.txt',\n", " 'wookieepedia\\\\Officer.txt',\n", " 'wookieepedia\\\\Omega.txt',\n", " 'wookieepedia\\\\Oola.txt',\n", " 'wookieepedia\\\\Opening_crawl.txt',\n", " 'wookieepedia\\\\Operator.txt',\n", " 'wookieepedia\\\\Orn_Free_Taa.txt',\n", " 'wookieepedia\\\\Ossus_Academy_Main_Hangar.txt',\n", " 'wookieepedia\\\\Otoh_Gunga.txt',\n", " 'wookieepedia\\\\Ottilie.txt',\n", " 'wookieepedia\\\\Outer_Rim_Territories.txt',\n", " 'wookieepedia\\\\Owen.txt',\n", " 'wookieepedia\\\\Ozzel_family.txt',\n", " 'wookieepedia\\\\Padm%C3%A9_Amidala.txt',\n", " 'wookieepedia\\\\Palace.txt',\n", " 'wookieepedia\\\\Panaka.txt',\n", " 'wookieepedia\\\\Pelta-class_frigate.txt',\n", " 'wookieepedia\\\\Petranaki_arena.txt',\n", " 'wookieepedia\\\\Phoenix_Cell.txt',\n", " 'wookieepedia\\\\Phylanx_Redux_Transmitter.txt',\n", " 'wookieepedia\\\\Pilot%27s_End.txt',\n", " 'wookieepedia\\\\Pilot.txt',\n", " 'wookieepedia\\\\Plo_Koon.txt',\n", " 'wookieepedia\\\\Poggle_the_Lesser.txt',\n", " 'wookieepedia\\\\Polis_Massa.txt',\n", " 'wookieepedia\\\\Porkins_Belly_Run.txt',\n", " 'wookieepedia\\\\Porter_Engle.txt',\n", " 'wookieepedia\\\\Power_Station.txt',\n", " 'wookieepedia\\\\Quarsh_Panaka.txt',\n", " 'wookieepedia\\\\Qui-Gon_Jinn.txt',\n", " 'wookieepedia\\\\R3-T7.txt',\n", " 'wookieepedia\\\\Rab%C3%A9_Tonsort.txt',\n", " 'wookieepedia\\\\Rancor_Pit.txt',\n", " 'wookieepedia\\\\Reactor_shaft.txt',\n", " 'wookieepedia\\\\Rebel_Alliance_Navy.txt',\n", " 'wookieepedia\\\\Rebel_Fighter_Squadrons_II_Expansion_Pack.txt',\n", " 'wookieepedia\\\\Rebel_base.txt',\n", " 'wookieepedia\\\\Rebel_officer_corps.txt',\n", " 'wookieepedia\\\\Rebel_pilot.txt',\n", " 'wookieepedia\\\\Red_Eleven.txt',\n", " 'wookieepedia\\\\Red_Leader.txt',\n", " 'wookieepedia\\\\Red_Nine.txt',\n", " 'wookieepedia\\\\Red_Seven.txt',\n", " 'wookieepedia\\\\Red_Ten.txt',\n", " 'wookieepedia\\\\Red_Three.txt',\n", " 'wookieepedia\\\\Red_Two.txt',\n", " 'wookieepedia\\\\Remember_Alderaan_(Star_Cruiser).txt',\n", " 'wookieepedia\\\\Republic_(cruiser).txt',\n", " 'wookieepedia\\\\Republic_battlecruiser.txt',\n", " 'wookieepedia\\\\Return.txt',\n", " 'wookieepedia\\\\Ric_Oli%C3%A9.txt',\n", " 'wookieepedia\\\\Ridge.txt',\n", " 'wookieepedia\\\\Rieekan_(SIS).txt',\n", " 'wookieepedia\\\\Rorworr_(Naboo).txt',\n", " 'wookieepedia\\\\Royal_Guard_(Onderon).txt',\n", " 'wookieepedia\\\\Rune.txt',\n", " 'wookieepedia\\\\Rune_Haako.txt',\n", " 'wookieepedia\\\\Ruwee_Naberrie.txt',\n", " 'wookieepedia\\\\Sab%C3%A9.txt',\n", " 'wookieepedia\\\\Sail_barge.txt',\n", " 'wookieepedia\\\\Saleucami.txt',\n", " 'wookieepedia\\\\Sandcrawler.txt',\n", " 'wookieepedia\\\\Scout.txt',\n", " 'wookieepedia\\\\Sebulba.txt',\n", " 'wookieepedia\\\\Second_officer.txt',\n", " 'wookieepedia\\\\Seek.txt',\n", " 'wookieepedia\\\\Senate.txt',\n", " 'wookieepedia\\\\Senate_Building.txt',\n", " 'wookieepedia\\\\Senate_chamber.txt',\n", " 'wookieepedia\\\\Senior_Controller.txt',\n", " 'wookieepedia\\\\Shmi_Skywalker_Lars.txt',\n", " 'wookieepedia\\\\Sio_Bibble.txt',\n", " 'wookieepedia\\\\Siward_Cass.txt',\n", " 'wookieepedia\\\\Skiff.txt',\n", " 'wookieepedia\\\\Slave_Quarters_Row.txt',\n", " 'wookieepedia\\\\Snowspeeder.txt',\n", " 'wookieepedia\\\\Sola.txt',\n", " 'wookieepedia\\\\Space_station.txt',\n", " 'wookieepedia\\\\Spaceport.txt',\n", " 'wookieepedia\\\\Star.txt',\n", " 'wookieepedia\\\\Star_Cruiser.txt',\n", " 'wookieepedia\\\\Star_Destroyer.txt',\n", " 'wookieepedia\\\\Star_Wars_Outlaws.txt',\n", " 'wookieepedia\\\\Star_Wars_Rebels___Steps_Into_Shadow.txt',\n", " 'wookieepedia\\\\Star_Wars_Scrapbook.txt',\n", " 'wookieepedia\\\\Star_Wars___Complete_Locations_(2005).txt',\n", " 'wookieepedia\\\\Star_Wars___Complete_Locations_(2016).txt',\n", " 'wookieepedia\\\\Star_Wars___Complete_Vehicles_(2013).txt',\n", " 'wookieepedia\\\\Star_Wars___Dark_Empire.txt',\n", " 'wookieepedia\\\\Star_Wars___Episode_II_Attack_of_the_Clones_(unabridged_audiobook).txt',\n", " 'wookieepedia\\\\Star_Wars___Galactic_Battlegrounds.txt',\n", " 'wookieepedia\\\\Star_Wars___Knights_of_the_Old_Republic.txt',\n", " 'wookieepedia\\\\Star_Wars___Legacy_Volume_II_Book_3%E2%80%94Wanted___Ania_Solo.txt',\n", " 'wookieepedia\\\\Star_Wars___Return_of_the_Jedi_Coloring_Book_(Luke_Skywalker).txt',\n", " 'wookieepedia\\\\Star_Wars___The_Bad_Batch.txt',\n", " 'wookieepedia\\\\Star_Wars___The_Black_Series.txt',\n", " 'wookieepedia\\\\Star_Wars___The_Old_Republic___Onslaught.txt',\n", " 'wookieepedia\\\\Stardestroyer.txt',\n", " 'wookieepedia\\\\Starfield_Road.txt',\n", " 'wookieepedia\\\\Stormtrooper.txt',\n", " 'wookieepedia\\\\Super_Star_Destroyer.txt',\n", " 'wookieepedia\\\\Super_battle_droid_rocket_trooper.txt',\n", " 'wookieepedia\\\\Surface_Marshal.txt',\n", " 'wookieepedia\\\\TIE_fighter.txt',\n", " 'wookieepedia\\\\TIE_fighter_series.txt',\n", " 'wookieepedia\\\\TK-421.txt',\n", " 'wookieepedia\\\\Tagge_protoblade.txt',\n", " 'wookieepedia\\\\Takobo_Spaceport_Depot.txt',\n", " 'wookieepedia\\\\Tarkin_family.txt',\n", " 'wookieepedia\\\\Tatooine.txt',\n", " 'wookieepedia\\\\Taun_We.txt',\n", " 'wookieepedia\\\\Technician.txt',\n", " 'wookieepedia\\\\Terr_Taneel.txt',\n", " 'wookieepedia\\\\Tey_How.txt',\n", " 'wookieepedia\\\\The_Amazing_Book_of_LEGO_Star_Wars.txt',\n", " 'wookieepedia\\\\The_Bad_Batch_Season_3.txt',\n", " 'wookieepedia\\\\The_DarkStryder_Campaign.txt',\n", " 'wookieepedia\\\\The_Force_Theme.txt',\n", " 'wookieepedia\\\\The_Legends_of_Luke_Skywalker.txt',\n", " 'wookieepedia\\\\The_Phantom_Apprentice.txt',\n", " 'wookieepedia\\\\The_Second_Kessel_Run.txt',\n", " 'wookieepedia\\\\The_Thrawn_Trilogy_Sourcebook.txt',\n", " 'wookieepedia\\\\Theda.txt',\n", " 'wookieepedia\\\\Theed.txt',\n", " 'wookieepedia\\\\There_Is_Always_Another.txt',\n", " 'wookieepedia\\\\Thire.txt',\n", " 'wookieepedia\\\\Threepio_Takes_Flight.txt',\n", " 'wookieepedia\\\\Throne_room_gown.txt',\n", " 'wookieepedia\\\\Tigran_Jamiro.txt',\n", " 'wookieepedia\\\\Timeline_of_galactic_history.txt',\n", " 'wookieepedia\\\\Toos.txt',\n", " 'wookieepedia\\\\Touchstone.txt',\n", " 'wookieepedia\\\\Tracking_officer.txt',\n", " 'wookieepedia\\\\Trade_Federation_Battleship_(aquatic).txt',\n", " 'wookieepedia\\\\Trade_Federation_Cruiser_(Second_Battle_of_Lok).txt',\n", " 'wookieepedia\\\\Trench_Run_Defense.txt',\n", " 'wookieepedia\\\\Triton_Squad%27s_second_commander.txt',\n", " 'wookieepedia\\\\Trooper.txt',\n", " 'wookieepedia\\\\Turret_Room.txt',\n", " 'wookieepedia\\\\Unidentified_Astro-Traffic_Control_Officer.txt',\n", " 'wookieepedia\\\\Unidentified_B1_battle_droid_captain.txt',\n", " 'wookieepedia\\\\Unidentified_Imperial_controller_(female).txt',\n", " 'wookieepedia\\\\Unidentified_Naboo_lake.txt',\n", " 'wookieepedia\\\\Unidentified_Rebel_captain.txt',\n", " 'wookieepedia\\\\Unidentified_announcer.txt',\n", " 'wookieepedia\\\\Unidentified_chief_pilot_(Night_Caller).txt',\n", " 'wookieepedia\\\\Unidentified_clone_trooper_(Second_Battle_of_Geonosis).txt',\n", " 'wookieepedia\\\\Unidentified_control_officer.txt',\n", " 'wookieepedia\\\\Unidentified_elevator_(D-Sector).txt',\n", " 'wookieepedia\\\\Unidentified_first_officer_(Wennis).txt',\n", " 'wookieepedia\\\\Unidentified_server_droid.txt',\n", " 'wookieepedia\\\\Unidentified_wingman.txt',\n", " 'wookieepedia\\\\Unidentified_woman_(Korvix_Vorn).txt',\n", " 'wookieepedia\\\\Upper_City_Cantina.txt',\n", " 'wookieepedia\\\\Utapau.txt',\n", " 'wookieepedia\\\\Uvak_wrangler.txt',\n", " 'wookieepedia\\\\Valorum_(The_Star_Wars).txt',\n", " 'wookieepedia\\\\Vendor.txt',\n", " 'wookieepedia\\\\Voice.txt',\n", " 'wookieepedia\\\\Voices.txt',\n", " 'wookieepedia\\\\Wald.txt',\n", " 'wookieepedia\\\\War%27qi.txt',\n", " 'wookieepedia\\\\Watto.txt',\n", " 'wookieepedia\\\\Wedge_Antilles.txt',\n", " 'wookieepedia\\\\Wicket_Wystri_Warrick.txt',\n", " 'wookieepedia\\\\Willard_Waylin.txt',\n", " 'wookieepedia\\\\Windu%27s_Guile.txt',\n", " 'wookieepedia\\\\Xyston-class_Star_Destroyer.txt',\n", " 'wookieepedia\\\\Yavin.txt',\n", " 'wookieepedia\\\\Yavin_4.txt',\n", " 'wookieepedia\\\\Yoda.txt',\n", " 'wookieepedia\\\\Younglings_(comic).txt',\n", " 'wookieepedia\\\\Zam.txt',\n", " 'wookieepedia\\\\Zev.txt'}" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "REGENERATE_WOOKIEEPEDIA_DATABASE = False\n", "\n", "if (db_exists := os.path.exists(db_dir := str(Path('wookieepedia') / 'db'))):\n", " if REGENERATE_WOOKIEEPEDIA_DATABASE:\n", " print('Deleting the previous database and creating a new one (because otherwise content is duplicated in the db every time this block is run)')\n", " shutil.rmtree(db_dir)\n", " else: woo_db = Chroma(embedding_function = SentenceTransformerEmbeddings(model_name = 'all-MiniLM-L6-v2'), persist_directory = db_dir)\n", "\n", "if not db_exists or (db_exists and REGENERATE_WOOKIEEPEDIA_DATABASE): # Unfortunate disjoining of the two conditional blocks\n", " pages = DirectoryLoader('wookieepedia', glob = '*.txt', loader_cls = TextLoader).load()\n", "\n", " page_chunks = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200, separators = ['\\n\\n\\n', '\\n\\n', '\\n']).split_documents(pages)\n", " # Why not some overlap for extra context just in case?\n", " # Also, no need for fancier sentence or semantic splitting in this highly-formatted text\n", "\n", " woo_db = Chroma.from_documents(page_chunks, SentenceTransformerEmbeddings(model_name = 'all-MiniLM-L6-v2'), persist_directory = db_dir)\n", "\n", "print(f'The Wookieepedia database contains {len(woo_db)} chunks, with mean length of {sum(len(s) for s in woo_db.get()[\"documents\"]) / len(woo_db):.0f} characters')\n", "\n", "print('Current source pages in Wookieepedia db:')\n", "print(len(source_pages := set(md.get('source') for md in woo_db.get()['metadatas'])))\n", "source_pages\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Document(page_content='Vader telling Luke that he is his father \\nVader beckoned Luke to join him in the dark side, but Luke vehemently refused. Vader then told Luke that Kenobi had hidden from him the truth about his father, Anakin Skywalker, before revealing that he was, in fact, his father. Luke refused to believe the truth, but Vader continued to tempt his weakened son, offering Luke the chance to destroy the Emperor and \"bring order to the galaxy,\" just as he tried to do with Padmé on Mustafar. He even pleaded with his son to come with him. In Vader\\'s ideal world, his son would have taken his hand, accepting him as his father. However, Skywalker instead chose to throw himself into the shaft, possibly facing death to avoid joining Vader.', metadata={'source': 'wookieepedia\\\\Anakin_Skywalker.txt'}),\n", " Document(page_content='\"Obi-Wan never told you what happened to your father.\"\"He told me enough! He told me you killed him!\"\"No. I am your father.\"\"No...that\\'s not true! That\\'s impossible!\"\"Search your feelings. You know it to be true.\"\"No!\"\\n―Darth Vader and Luke Skywalker\\xa0— Listen (file info)\\n The revelation that Vader was Skywalker\\'s father shook the Jedi hopeful to his core. \\nSeeing that Skywalker was vulnerable in his weakened state, Vader offered to show Skywalker the power of the dark side and complete his training, but Skywalker adamantly refused to ever join Vader, the man who had killed his father. In that moment, Vader revealed the truth: that he was his father.', metadata={'source': 'wookieepedia\\\\Luke_Skywalker.txt'}),\n", " Document(page_content='\"Your thoughts betray you, Father. I feel the good in you, the conflict.\"\"There is no conflict.\"\"You couldn\\'t bring yourself to kill me before, and I don\\'t believe you\\'ll destroy me now.\"\"You underestimate the power of the dark side. If you will not fight, then you will meet your destiny!\"\\n―Luke Skywalker and Darth Vader\\n Father and son fight as the Emperor watches.', metadata={'source': 'wookieepedia\\\\Anakin_Skywalker.txt'}),\n", " Document(page_content=\"Sidious manipulated Skywalker into a battle with his father, seeking to replace Vader with a younger apprentice. \\nLuke was then manipulated by the Emperor, who told him of his friends' impending doom and Luke was told that he would soon become his apprentice. Luke at first resisted, but as the Emperor continued to taunt him, Luke grew angrier and then grabbed his lightsaber to strike the old man down. Darth Vader intercepted Luke's strike and the two began to engage in a duel much to the Emperor's delight.\\nLuke was first able to calm himself, refusing several times to continue fighting his father. Luke attempted to hide from Vader as their fight became more intense but Vader was able to sense his son's thoughts and learned that Luke had a sister. With this knowledge, Vader told Luke that if he did not turn to the dark side, perhaps Leia would. The threat of losing his sister enraged Luke and drove him to angrily attack Vader.\", metadata={'source': 'wookieepedia\\\\Darth_Sidious.txt'}),\n", " Document(page_content='Luke battles the Knights of Ren', metadata={'source': 'wookieepedia\\\\Luke_Skywalker.txt'}),\n", " Document(page_content='\"Luke, you can destroy the Emperor. He has foreseen this. It is your destiny. Join me, and together, we can rule the galaxy as father and son!\"\\n―Darth Vader, to Luke Skywalker\\nDarth Vader eventually lured Luke Skywalker to Cloud City on Bespin, where the Sith Lord attempted to capture Skywalker in carbonite and transport the Jedi-aspirant to the Emperor. However, Skywalker evaded capture and was confronted by Vader in a duel on Cloud City. There, with the Jedi-aspirant beaten by a precarious ledge, Vader divulged knowledge of his relationship with Skywalker, his son, and gave an ultimatum for the youth: to join forces—as said to be destiny—with his father, Darth Vader, or die. However, Luke Skywalker rejected his newfound father, and allowed himself to fall into the abyss.', metadata={'source': 'wookieepedia\\\\Darth_Sidious.txt'}),\n", " Document(page_content='Luke Skywalker after the Battle of Yavin', metadata={'source': 'wookieepedia\\\\Luke_Skywalker.txt'}),\n", " Document(page_content=\"\\nThe son of Jedi Knight Anakin Skywalker and Senator Padmé Amidala, Luke Skywalker was born along with his twin sister, Leia, in 19 BBY. As a result of Amidala's death and Anakin's fall to the dark side of the Force, the Skywalker children were separated and sent into hiding, with Leia adopted by the royal family of Alderaan while Luke was raised by his relatives on Tatooine. Longing for a life of adventure and purpose, Skywalker joined the Rebellion and began learning the ways of the Force under the guidance of Jedi Master Obi-Wan Kenobi, whose first apprentice was Luke's own father. During the Battle of Yavin in 0 BBY, Skywalker saved the Alliance from annihilation by destroying the Empire's planet-killing superweapon, the Death Star. He continued his training in the years that followed, determined to become a Jedi Knight like his father before him, and found a new mentor in Grand Master Yoda. After his master's death, Skywalker participated in the Battle of Endor in 4 ABY, during which he confronted the Sith Lord Darth Vader, whom he learned was in fact his father, Anakin Skywalker. With Luke's help, Anakin returned to the light side of the Force by killing the Emperor and Dark Lord of the Sith Darth Sidious at the cost of his own life, fulfilling his destiny as the Chosen One. Following the Battle of Endor, Skywalker trained his sister as a first Jedi apprentice on Ajan Kloss, but Leia ended her training after the birth of her son and seeing his death as a result of finishing her knighthood.\", metadata={'source': 'wookieepedia\\\\Luke_Skywalker.txt'}),\n", " Document(page_content='Meeting Luke Skywalker', metadata={'source': 'wookieepedia\\\\Darth_Sidious.txt'}),\n", " Document(page_content=\"After Luke cut off Vader's mechanical hand, Sidious betrayed Vader by instructing Luke to take Vader's place by his side, much like how he betrayed Dooku and had him replaced with Vader. However, Luke began to see what had happened and that he was dangerously close to becoming like his father was now; a slave to the dark side of the Force. Luke threw away his lightsaber and turned to face the Emperor. He told the Emperor that he would never turn to the dark side and that he was a Jedi, like his father before him.\", metadata={'source': 'wookieepedia\\\\Darth_Sidious.txt'})]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Query testing\n", "\n", "res = woo_db.similarity_search('Luke father reveal fight', k = 10)\n", "\n", "# for r in res: print(r.page_content)\n", "res" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Document(page_content='The Tragedy of Darth Plagueis the Wise', metadata={'source': 'https://starwars.fandom.com/wiki/Darth_Plagueis'}),\n", " Document(page_content='\"Darth Plagueis was a Dark Lord of the Sith so powerful and so wise, he could use the Force to influence the midi-chlorians to create life. He had such a knowledge of the dark side, he could even keep the ones he cared about from dying.\"\\n―Sheev Palpatine', metadata={'source': 'https://starwars.fandom.com/wiki/Darth_Plagueis'}),\n", " Document(page_content='The death of Darth Plagueis\\n\\n\"Unfortunately, he taught his apprentice everything he knew. Then his apprentice killed him in his sleep.\"\\n―Sheev Palpatine\\nAccording to Sidious, Plagueis was powerful enough that he could use the Force to influence the midi-chlorians to create life and keep the ones he cared about from dying, a precious knowledge that awarded him the epithet of \"The Wise.\" However, Plagueis also developed a belief that the Force could \"strike back\" at him for his power. In truth, he became so powerful that the only thing he still dreaded was losing his power.', metadata={'source': 'https://starwars.fandom.com/wiki/Darth_Plagueis'}),\n", " Document(page_content='Behind the scenes\\n Darth Plagueis as depicted in Star Wars Legends \\nDarth Plagueis was first mentioned on-screen in the 2005 film Star Wars: Episode III Revenge of the Sith, the third and final installment of the Star Wars prequel trilogy. Although Palpatine was never confirmed to be Plagueis\\' apprentice in the movie itself, a link to the official encyclopedia on StarWars.com did refer to Sidious as having been \"trained by Darth Plagueis.\"\\nThe character was created by George Lucas as early as the first draft of Revenge of the Sith—dated April 2003—and possibly earlier. His story was massively expanded upon in the 2012 Star Wars Legends novel Darth Plagueis, written by James Luceno. It notably established Plagueis to be a Muun just as Lucas proposed.', metadata={'source': 'https://starwars.fandom.com/wiki/Darth_Plagueis'}),\n", " Document(page_content=\"―Sheev Palpatine\\nDarth Plagueis (pronounced /'pleɪɡ.əs/) was a Force-sensitive male Muun Dark Lord of the Sith and the Sith Master of Darth Sidious. Plagueis lusted for immortality, believing the secret laid in science. To that end, he worked with his Sith apprentice, conducting research into bioengineering and experimenting with his ability to influence the midi-chlorians to create life. In doing so, Plagueis acquired considerable knowledge of the Force, but he was ultimately betrayed and murdered by his own apprentice in accordance with the Rule of Two. Sidious later recounted the tale of his master's demise to lure the Jedi Knight Anakin Skywalker to the dark side of the Force, calling the fallen Sith Lord Darth Plagueis the Wise. After his own demise, Sidious used his master's teachings to cheat death, though true immortality still eluded him up until his final end.\", metadata={'source': 'https://starwars.fandom.com/wiki/Darth_Plagueis'}),\n", " Document(page_content='Biography\\nEarly life\\nDarth Plagueis was a legendary Dark Lord of the Sith trained by the Sith Master Darth Tenebrous. At some point during his life, Plagueis acquired the protocol droid 11-4D. During his time as a Sith Lord and studying the Force, Plagueis acquired a vast amount of knowledge about the dark side and its teachings.\\n\\nMaster of Darth Sidious', metadata={'source': 'https://starwars.fandom.com/wiki/Darth_Plagueis'}),\n", " Document(page_content='\"He became so powerful, the only thing he was afraid of was…losing his power, which eventually of course he did.\"\\n―Sheev Palpatine, to Anakin Skywalker\\n Darth Plagueis was a wise Sith Lord who became paranoid as he grew in power. \\nDarth Plagueis was a wise Dark Lord of the Sith who possessed a vast knowledge of the dark side of the Force. He was a Muun male, and had orange eyes. He was a meticulous planner, who tried to accomplish the Sith\\'s goal to replace the Galactic Republic with a Sith Empire. Due to his powers, he also became paranoid, believing that the Force could \"strike back\".', metadata={'source': 'https://starwars.fandom.com/wiki/Darth_Plagueis'}),\n", " Document(page_content='\"Did you ever hear the Tragedy of Darth Plagueis the Wise?\"\\n―Sheev Palpatine, to Anakin Skywalker\\n Anakin Skywalker learned about the late Darth Plagueis as recounted by Sheev Palpatine. \\nIn the waning days of the Republic, Sidious recounted the history of his late master to Skywalker, whom he befriended years before the Clone Wars in an effort to gradually turn him to the dark side of the Force. Skywalker, who was unaware at the time that his friend was in fact a Sith Lord, and having developed premonitions of his secret wife\\'s death in childbirth, was intrigued by the story of Plagueis, particularly his ability to prevent death. When the fear of loss drove Skywalker to betray the Jedi Order, turning him into the Sith Lord Darth Vader, Sidious reformed the Republic into the Galactic Empire and declared himself Emperor. At the same time, he had the Jedi systematically executed throughout the galaxy by Order 66, completing the Sith plan that lasted for over a millennium.', metadata={'source': 'https://starwars.fandom.com/wiki/Darth_Plagueis'}),\n", " Document(page_content='Although Legends works show that Plagueis had power over life and death, Lucas himself stated Palpatine\\'s tale about Plagueis\\' abilities was a lie. Canon also dispensed with the idea of these powers being a lie. The Biography gallery of Darth Sidious\\' Databank entry and the \"Senator Palpatine\" card part of 2020 Topps Star Wars Holocron Series both state that Plagueis was killed by Sidious after he took Darth Maul as his apprentice. However, the 2019 reference book Ultimate Star Wars, New Edition lists Plagueis\\' murder at Sidious\\' hands as taking place before Sidious takes Maul under his wing.', metadata={'source': 'https://starwars.fandom.com/wiki/Darth_Plagueis'}),\n", " Document(page_content='\\nAt an opera show, Palpatine and Skywalker discussed the situation with Palpatine revealing to know that the Council wanted Skywalker to spy on him. At the same time, Skywalker, who was dealing with the fear of his wife\\'s loss was beginning to distrust the Jedi Council. This allowed Sidious to orchestrate Skywalker\\'s downfall; he claimed that the Jedi and the Sith were alike in nearly every way, setting the stage for the young Jedi to switch sides while playing to his devotion to the Jedi ideology. Next, Palpatine told him the story of his master, \"Darth Plagueis the Wise.\" According to the story, Plagueis possessed such strength in the Force that he could use it to prevent the ones closest to him from dying. Skywalker asked what happened to him, to which Palpatine said that he was afraid of losing his power, but had taught his apprentice everything, and then his apprentice killed him in his sleep. Skywalker, enthralled by the story, wished to know more about this power, asking him if he himself could learn it with Palpatine stating that, though possible, such things could not be taught by members of the Order. Skywalker, however, was unware that Palpatine was really talking about how he killed his mentor Plagueis, in order to lure him to the dark side.', metadata={'source': 'wookieepedia\\\\Darth_Sidious.txt'})]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Functions for possible interactive Wookieepedia querying and storing in the db\n", "\n", "def first_wookieepedia_result(query: str) -> str:\n", " '''Get the url of the first result when searching Wookieepedia for a query\n", " (best for simple names as queries, ideally generated by the llm for something like\n", " \"Produce a input consisting of the name of the most important element in the query so that its article can be looked up\")\n", " '''\n", " search_results = requests.get(f'https://starwars.fandom.com/wiki/Special:Search?query={\"+\".join(query.split(\" \"))}')\n", " soup = BeautifulSoup(search_results.content, 'html.parser')\n", " first_res = soup.find('a', class_ = 'unified-search__result__link')\n", " return first_res['href']\n", "\n", "# first_wookieepedia_result('Darth Plagueis')\n", "\n", "\n", "def get_wookieepedia_page_content(query: str, previous_sources: set[str]) -> Document | None:\n", " '''Return cleaned content from a Wookieepedia page provided it was not already sourced\n", " '''\n", " url = first_wookieepedia_result(query)\n", "\n", " if url in previous_sources: return None\n", " else:\n", " response = requests.get(url)\n", " soup = BeautifulSoup(response.content, 'html.parser')\n", " doc = soup.find('div', id = 'content').get_text()\n", "\n", " # Cleaning\n", " doc = doc.split('\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n')[-1] # The (multiple) preambles are separated by these many newlines; no harm done if not present\n", " doc = re.sub('\\[\\d*\\]', '', doc) # References (and section title's \"[]\" suffixes) are noise\n", " doc = doc.split('\\nAppearances\\n')[0] # Keep only content before these sections\n", " doc = doc.split('\\nSources\\n')[0] # Technically no need to check this if successfully cut on appearances, but no harm done\n", " doc = re.sub('Contents\\n\\n(?:[\\d\\.]+ [^\\n]+\\n+)+', '', doc) # Remove table of contents\n", "\n", " return Document(page_content = doc, metadata = dict(source = url))\n", "\n", "# print(get_wookieepedia_page_content('Darth Plagueis', set()))\n", "# print(get_wookieepedia_page_content('Darth Plagueis', set(md.get('source') for md in woo_db.get()['metadatas'])))\n", "\n", "\n", "def get_wookieepedia_context(original_query: str, simple_query: str, wdb: Chroma) -> list[Document]:\n", " try:\n", " doc = get_wookieepedia_page_content(simple_query, previous_sources = set(md.get('source') for md in wdb.get()['metadatas']))\n", " if doc is not None:\n", " new_chunks = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200).split_documents([doc])\n", " wdb.add_documents(new_chunks)\n", " print(f\"Added new chunks (for '{simple_query}' -> {doc['metadata']['source']}) to the Wookieepedia database.\")\n", " except: return []\n", "\n", " return wdb.similarity_search(original_query, k = 10)\n", "\n", "# get_wookieepedia_context('Do you know the Tragedy of Darth Plagueis the Wise?', 'Darth Plagueis', woo_db)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Local embedding model (unsure whether needed for deployment)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "da3292f4ae9e4d6c912f226b91a4412e", "version_major": 2, "version_minor": 0 }, "text/plain": [ "modules.json: 0%| | 0.00/349 [00:00\n", "{context}\n", "\n", "'''\n", "\n", "document_prompt = ChatPromptTemplate.from_messages([\n", " ('system', document_prompt_system_text),\n", " MessagesPlaceholder(variable_name = 'chat_history', optional = True),\n", " ('user', '{input}')\n", "])\n", "\n", "document_chain = create_stuff_documents_chain(llm, document_prompt)\n", "\n", "\n", "# document_prompt.format_messages(context = 'You are an expert in Star Wars lore', input = 'Are you knowledgeable about Star Wars?')\n", "# document_chain.invoke(dict(context = [Document(page_content = 'You are an expert in Star Wars lore')], input = 'Are you knowledgeable about Star Wars?'))\n", "\n", "\n", "# basic_chain = document_prompt | llm | StrOutputParser() # To extract just the message\n", "# basic_chain.invoke(dict(context = 'You are an expert of Star Wars lore', input = 'Are you knowledgeable about Star Wars?'))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "script_retriever_prompt = ChatPromptTemplate.from_messages([\n", " MessagesPlaceholder(variable_name = 'chat_history'),\n", " ('user', '{input}'),\n", " ('user', '''Given the above conversation, generate a search query to look up relevant information in a database containing the full scripts from the Star Wars films (i.e. just dialogue and brief scene descriptions).\n", " The query need not be a proper sentence, but a list of keywords likely to be in dialogue or scene descriptions''')\n", "])\n", "\n", "script_retriever_chain = create_history_aware_retriever(llm, script_db.as_retriever(), script_retriever_prompt) # Essentially just: prompt | llm | StrOutputParser() | retriever\n", "\n", "\n", "# script_retriever_prompt.format_messages(\n", "# chat_history = [HumanMessage(content = 'Are you knowledgeable about Star Wars?'), AIMessage(content = 'Very')],\n", "# input = 'Do you know the tragedy of Darth Plagueis the Wise?'\n", "# )\n", "\n", "# script_retriever_chain.invoke(dict(\n", "# chat_history = [HumanMessage(content = 'Are you knowledgeable about Star Wars?'), AIMessage(content = 'Very')],\n", "# input = 'Luke cloud city'\n", "# ))\n" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "woo_retriever_prompt = ChatPromptTemplate.from_messages([\n", " MessagesPlaceholder(variable_name = 'chat_history'),\n", " ('user', '{input}'),\n", " ('user', 'Given the above conversation, generate a search query to find a relevant page in the Star Wars fandom wiki; the query should be something simple, such as the name of a character, place, event, item, etc.')\n", "])\n", "\n", "woo_retriever_chain = create_history_aware_retriever(llm, woo_db.as_retriever(), woo_retriever_prompt) # Essentially just: prompt | llm | StrOutputParser() | retriever\n", "\n", "\n", "# woo_retriever_prompt.format_messages(\n", "# chat_history = [HumanMessage(content = 'Are you knowledgeable about Star Wars?'), AIMessage(content = 'Very')],\n", "# input = 'Do you know the tragedy of Darth Plagueis the Wise?'\n", "# )\n", "\n", "# woo_retriever_chain.invoke(dict(\n", "# chat_history = [HumanMessage(content = 'Are you knowledgeable about Star Wars?'), AIMessage(content = 'Very')],\n", "# input = 'Do you know the tragedy of Darth Plagueis the Wise?'\n", "# ))\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'input': \"Who participates in Han's rescue from Jabba? And where is the palace?\",\n", " 'context': [Document(page_content=\"After the smuggler Han Solo failed to repay him for lost cargo, Jabba placed a high price on his head. Solo was eventually delivered to him by one of his bounty hunters, Boba Fett, as a gift from Darth Vader. However, this capture brought him to the attention of Jedi Knight Luke Skywalker, who sought to rescue his friend from Jabba's imprisonment. As he attempted to execute the Jedi and his allies in the Great Pit of Carkoon, Jabba was choked to death by Leia Organa. With the Hutts unable to decide who would inherit Jabba's criminal ventures, many of his slaves, including the Niktos, were free, and his palace was occupied by his former Majordomo Bib Fortuna, who took his place as Daimyo of Tatooine until Fett killed and usurped him. Fett sought to rebuild Jabba's criminal empire in his own image, intending to rule with respect rather than the fear that the Hutt instilled in his followers.\", metadata={'source': 'wookieepedia\\\\Jabba_Desilijic_Tiure.txt'}),\n", " Document(page_content=\"On a distant spaceport, Mira manages to escape from the stormtroopers who were guarding her and sneak into the Millennium Falcon, which was being loading with spice. Han Solo and Chewbacca are going to make another Kessel Run for Jabba the Hutt, but first he was going to pick up Luke Skywalker. On the hidden Rebel base on the planet Randa, Han picks up Luke, Artoo and Threepio and finds his unexpected guest. Mira tells Luke and Han about Bzorn's plan and asks Han to deliver her to the professor, knowing that he will stop the destruction after he learns that Mira is safe.\", metadata={'source': 'wookieepedia\\\\The_Second_Kessel_Run.txt'}),\n", " Document(page_content=\"Djarin takes the Darksaber while Gideon is taken into New Republic custody, though he is broken out en route to trial.\\nAs Bo-Katan Kryze returned to her forces without the Darksaber, her Mandalorians abandon her and become privateers under Axe Woves.\\nThe Jedi Luke Skywalker beckons Grogu to train as a Jedi with him. Removing his Mandalorian helmet, Djarin gives his blessing to Grogu before letting the latter go.\\nLuke Skywalker builds a Jedi Temple on Ossus. After a brief time with Skywalker there, Grogu decides to return to Din Djarin. Jabba's Palace under new management \\nBoba Fett and Fennec Shand attack Jabba's Palace, replacing Bib Fortuna's place as master of the palace.\\nA gang war erupts on Tatooine, with Daimyo Boba Fett's Fett gotra defeating the Pyke Syndicate presence on the world and emerging as its leading criminal syndicate.\\nc. 9 ABY\", metadata={'source': 'wookieepedia\\\\Timeline_of_galactic_history.txt'}),\n", " Document(page_content=\"At the chancellor's request, Windu dispatched Kenobi and Skywalker on a mission to save Jabba's son. \\nSoon afterward Windu received a message from the Advanced Recon Force Scout Troopers who tracked Rotta's location to an old monastery on the planet Teth. Due to the monastery's heavy fortification, Windu instructed the troopers to stand by and await Republic reinforcements. He then reassigned three of Admiral Wurtz's cruisers to the rescue operation. In spite of Count Dooku's plot to form an alliance between the Confederacy and the Hutt Clan, Rotta was ultimately redeemed from captivity by Anakin Skywalker and his new Padawan, Ahsoka Tano. The Separatists were defeated on Teth and Jabba was made aware of Dooku's conspiracy, which in turn allowed the Republic to secure passage through the Hyperspace lanes in Hutt territory.\\n\\nLiberation of Ryloth\", metadata={'source': 'wookieepedia\\\\Mace_Windu.txt'})],\n", " 'answer': \"Han Solo's rescue from Jabba the Hutt's palace involves a team consisting of Jedi Knight Luke Skywalker, Princess Leia Organa (disguised as the bounty hunter Boushh), Chewbacca, Lando Calrissian, R2-D2, and C-3PO. The palace where Han is held captive is located on the desert planet of Tatooine. Jabba's palace is a large and imposing structure situated in the Dune Sea of Tatooine, serving as the crime lord's headquarters and residence.\"}" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# full_chain = create_retrieval_chain(script_retriever_chain, document_chain)\n", "full_chain = create_retrieval_chain(woo_retriever_chain, document_chain)\n", "\n", "# full_chain.invoke(dict(\n", "# # chat_history = [HumanMessage(content = 'Are you knowledgeable about Star Wars?'), AIMessage(content = 'Very')],\n", "# input = \"Who participates in Han's rescue from Jabba? And where is the palace?\"\n", "# ))" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "# simplify_query_prompt = ChatPromptTemplate.from_messages([\n", "# ('system', 'Given the above conversation, generate a search query to find a relevant page in the Star Wars fandom wiki; the query should be something simple, at most 4 words, such as the name of a character, place, event, item, etc.'),\n", "# MessagesPlaceholder('chat_history', optional = True), # Using this form since not clear how to have optional = True in the tuple form\n", "# ('human', '{query}')\n", "# ])\n", "\n", "# simplify_query_chain = simplify_query_prompt | llm | StrOutputParser() # To extract just the message\n", "\n", "# # simplify_query_chain.invoke(dict(context = 'You are an expert of Star Wars lore', query = 'Do you know the tragedy of Darth Plagueis the Wise?'))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Agent version" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# Could use Tavily as a generic search engine for a retriever agent as in the docs, but want more specific (if limited) capabilities here\n", "\n", "script_tool = create_retriever_tool(\n", " script_db.as_retriever(search_kwargs = dict(k = 4)),\n", " 'search_film_scripts',\n", " '''Search the Star Wars film scripts. This tool should be the first choice for Star Wars related questions.\n", " Queries passed to this tool should be lists of keywords likely to be in dialogue or scene descriptions, and should not include film titles.'''\n", ")\n", "\n", "\n", "woo_tool = create_retriever_tool(\n", " woo_db.as_retriever(search_kwargs = dict(k = 4)),\n", " 'search_wookieepedia',\n", " 'Search the Star Wars fandom wiki. This tool should be the first choice for Star Wars related questions.'\n", " # This tool should be used for queries about details of a particular character, location, event, weapon, etc., and the query should be something simple, such as the name of a character, place, event, item, etc.'''\n", ")\n", "\n", "tools = [script_tool, woo_tool]" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "\n", "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", "\u001b[32;1m\u001b[1;3m\n", "Invoking: `search_film_scripts` with `{'query': 'Darth Plagueis the Wise'}`\n", "\n", "\n", "\u001b[0m\u001b[36;1m\u001b[1;3mOBI-WAN: (continuing) It can't be . . . It can't be . . .\n", "\n", "As ANAKIN surveys the carnage, a DARK-ROBED SITH LORD enters. ANAKIN turns to DARTH SIDIOUS and kneels before him.\n", "\n", "ANAKIN: The traitors have been taken care of, Lord Sidious.\n", "\n", "DARTH SlDIOUS: Good . . . good . . . You have done well, my new apprentice. Do you feel your power growing?\n", "\n", "ANAKIN: Yes, My Master.\n", "\n", "DARTH SlDIOUS: Now, Lord Vader, now go and bring peace to the Empire.\n", "\n", "OBI-WAN watches in horror. Tears well up in his eyes.\n", "\n", "OBI-WAN: I can't watch any more.\n", "\n", "OBI-WAN switches off the hologram. The TWO JEDI stand in silence for a few moments.\n", "\n", "YODA: Destroy the Sith, we must.\n", "\n", "OBI-WAN: Send me to kill the Emperor. I will not kill Anakin.\n", "\n", "YODA: To fight this Lord Sidious, strong enough, you are not.\n", "\n", "OBI-WAN: He is like my brother ... I cannot do it.\n", "\n", "YODA: Twisted by the dark side, young Skywalker has become. The boy you trained, gone he is . . . Consumed by Darth Vader.\n", "\n", "OBI-WAN: How could it have come to this?\n", "\n", "INT. DEATH STAR - DOCKING BAY\n", "\n", "Threepio and Artoo-Detoo are in the center of the Death Star's \n", "Imperial docking bay.\n", "\n", "THREEPIO\n", "Come on, Artoo, we're going!\n", "\n", "Threepio ducks out of sight as the seven stormtroopers who \n", "were guarding the starship rush past them heading towards \n", "Ben and The Sith Knight. He pulls on Artoo.\n", "\n", "INT. DEATH STAR - HALLWAY\n", "\n", "Solo, Chewie, Luke, and Leia tensely watch the duel. The \n", "troops rush toward the battling knights.\n", "\n", "HAN\n", "Now's our chance! Go!\n", "\n", "They start for the Millennium Falcon. Ben sees the troops \n", "charging toward him and realizes that he is trapped. Vader \n", "takes advantage of Ben's momentary distraction and brings \n", "his mighty lightsaber down on the old man. Ben manages to \n", "deflect the blow and swiftly turns around.\n", "\n", "The old Jedi Knight looks over his shoulder at Luke, lifts \n", "his sword from Vader's then watches his opponent with a serene \n", "look on his face.\n", "\n", "LUKE\n", "Hurry up! Come with me! What are you \n", "waiting for?! Get in gear!\n", "\n", "The robot scoots around in a tight circle, stops short, and \n", "smoke begins to pour out of every joint. Luke throws his \n", "arms up in disgust. Exasperated, the young farm boy jumps \n", "into his Landspeeder leaving the smoldering robot to hum \n", "madly.\n", "\n", "INT. REBEL BLOCKADE RUNNER - MAIN HALLWAY\n", "\n", "The awesome, seven-foot-tall Dark Lord of the Sith makes his \n", "way into the blinding light of the main passageway. This is \n", "Darth Vader, right hand of the Emperor. His face is obscured \n", "by his flowing black robes and grotesque breath mask, which \n", "stands out next to the fascist white armored suits of the \n", "Imperial stormtroopers. Everyone instinctively backs away \n", "from the imposing warrior and a deathly quiet sweeps through \n", "the Rebel troops. Several of the Rebel troops break and run \n", "in a frenzied panic.\n", "\n", "INT. REBEL BLOCKADE RUNNER\n", "\n", "A woman's hand puts a card into an opening in Artoo's dome.\n", "Artoo makes beeping sounds.\n", "\n", "47INT DEATH STAR - CORRIDOR TO DOCKING BAY\n", "\n", "Lord Vader strides down the hallway, accompanied by a very nervous \n", "Death Star commander.\n", "\n", "48INT DOCKING BAY - DEATH STAR\n", "\n", "Thousands of Imperial troops in tight formation fill the mammoth \n", "docking bay. Vader and the officer walk to the landing platform, where \n", "the shuttle is coming to rest.\n", "\n", "The shuttle's ramp lowers and the Emperor's Royal Guards come out and \n", "create a lethal perimeter. The assembled troops move to rigid attention \n", "with a momentous SNAP.\n", "\n", "Then, in the huge SILENCE which follows, the EMPEROR appears. He is a \n", "rather small, shriveled old man. His bent frame slowly makes its way \n", "down the ramp with the aid of a gnarled cane. He wears a hooded cloak \n", "similar to the one Ben wears, except that it is black. The Emperor's \n", "face is shrouded and difficult to see, except for his piercing yellow \n", "eyes. Commander Jerjerrod and Darth Vader kneel to him. The Supreme \n", "Ruler of the galaxy beckons to the Dark Lord.\u001b[0m\u001b[32;1m\u001b[1;3mThe dialogue from the Star Wars film scripts does not directly mention the tragedy of Darth Plagueis the Wise. However, in \"Star Wars: Episode III - Revenge of the Sith,\" Chancellor Palpatine tells Anakin Skywalker the story of Darth Plagueis the Wise. According to Palpatine, Darth Plagueis was a Dark Lord of the Sith who was so powerful and wise that he could influence the midi-chlorians to create life and prevent death. Unfortunately, Darth Plagueis was betrayed and killed by his own apprentice, who was seeking to obtain his power. This story plays a significant role in Anakin's fall to the dark side as he becomes intrigued by the idea of cheating death, ultimately leading him to become Darth Vader under the influence of Palpatine.\u001b[0m\n", "\n", "\u001b[1m> Finished chain.\u001b[0m\n" ] }, { "data": { "text/plain": [ "{'chat_history': [HumanMessage(content='Are you knowledgeable about Star Wars?'),\n", " AIMessage(content='Very')],\n", " 'input': 'Do you know the tragedy of Darth Plagueis the Wise?',\n", " 'output': 'The dialogue from the Star Wars film scripts does not directly mention the tragedy of Darth Plagueis the Wise. However, in \"Star Wars: Episode III - Revenge of the Sith,\" Chancellor Palpatine tells Anakin Skywalker the story of Darth Plagueis the Wise. According to Palpatine, Darth Plagueis was a Dark Lord of the Sith who was so powerful and wise that he could influence the midi-chlorians to create life and prevent death. Unfortunately, Darth Plagueis was betrayed and killed by his own apprentice, who was seeking to obtain his power. This story plays a significant role in Anakin\\'s fall to the dark side as he becomes intrigued by the idea of cheating death, ultimately leading him to become Darth Vader under the influence of Palpatine.'}" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Agent - https://python.langchain.com/docs/modules/agents/\n", "# The agent design pattern is both simpler and better than manual chains since it can make its own choice between tools\n", "\n", "agent_system_text = '''\n", "You are a helpful agent who is very knowledgeable about Star Wars and your job is to answer questions about its plot, characters, etc.\n", "Use the context provided in the exchanges to come to produce your answers with as much detail as possible.\n", "If you do not know an answer, say so; do not make up information.\n", "'''\n", "\n", "agent_prompt = ChatPromptTemplate.from_messages([\n", " ('system', agent_system_text),\n", " MessagesPlaceholder('chat_history', optional = True), # Using this form since not clear how to have optional = True in the tuple form\n", " ('human', '{input}'),\n", " ('placeholder', '{agent_scratchpad}') # Required for chat history and the agent's intermediate processing values\n", "])\n", "\n", "agent = create_tool_calling_agent(llm, tools, agent_prompt)\n", "agent_executor = AgentExecutor(agent = agent, tools = tools, verbose = True)\n", "\n", "\n", "# agent_prompt.format_messages(\n", "# chat_history = [HumanMessage(content = 'Are you knowledgeable about Star Wars?'), AIMessage(content = 'Very')],\n", "# input = 'Do you know the tragedy of Darth Plagueis the Wise?'\n", "# )\n", "\n", "# agent_executor.invoke(dict(\n", "# chat_history = [HumanMessage(content = 'Are you knowledgeable about Star Wars?'), AIMessage(content = 'Very')],\n", "# input = 'Do you know the tragedy of Darth Plagueis the Wise?'\n", "# ))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Non-agent chain-logic version" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "# Determine which retriever is best and generate an appropriate query for it\n", "\n", "# Again, many LangChain objects seem to use the class-scope fields instead of instance ones in __init__...\n", "# Separately, the advantage of using a class over a dictionary is simply the descriptions\n", "class DirectedQuery(BaseModel):\n", " '''Determine whether a query is best answered by looking at scripts rather than articles'''\n", "\n", " query: str = Field(\n", " ...,\n", " description = '''The query to either search film scripts or wiki articles.\n", " A film script query should include character names and relevant keywords of what they are saying in the a scene which is likely to contain the required information.\n", " A wiki articles search should instead be at most 4 words, simply being the name of a character or location or event whose page is likely to contain the required information.''',\n", " )\n", " source: str = Field(\n", " ...,\n", " description = 'Either \"wiki\" or \"scripts\", indicating which source the query should be passed to.',\n", " )\n", "\n", "\n", "# output_parser = PydanticToolsParser(tools = [DirectedQuery])\n", "\n", "query_analyser_prompt = ChatPromptTemplate.from_messages([\n", " ('system', 'You have the ability to issue search queries of one of two kinds to get information to help answer questions.'),\n", " ('human', '{question}'),\n", "])\n", "structured_llm = llm.with_structured_output(DirectedQuery)\n", "query_generator = dict(question = RunnablePassthrough()) | query_analyser_prompt | structured_llm" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "retrievers = dict(wiki = woo_db.as_retriever(search_kwargs = dict(k = 4)), scripts = script_db.as_retriever(search_kwargs = dict(k = 4)))\n", "\n", "@chain\n", "def compound_retriever(question):\n", " response = query_generator.invoke(question)\n", " retriever = retrievers[response.source]\n", " return retriever.invoke(response.query)\n", "\n", "\n", "# compound_retriever.invoke('Do you know the tragedy of Darth Plagueis the Wise?')" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'input': 'Do you know the tragedy of Darth Plagueis the Wise?',\n", " 'context': [Document(page_content='The Tragedy of Darth Plagueis the Wise', metadata={'source': 'https://starwars.fandom.com/wiki/Darth_Plagueis'}),\n", " Document(page_content='Biography\\nEarly life\\nDarth Plagueis was a legendary Dark Lord of the Sith trained by the Sith Master Darth Tenebrous. At some point during his life, Plagueis acquired the protocol droid 11-4D. During his time as a Sith Lord and studying the Force, Plagueis acquired a vast amount of knowledge about the dark side and its teachings.\\n\\nMaster of Darth Sidious', metadata={'source': 'https://starwars.fandom.com/wiki/Darth_Plagueis'}),\n", " Document(page_content='\"Darth Plagueis was a Dark Lord of the Sith so powerful and so wise, he could use the Force to influence the midi-chlorians to create life. He had such a knowledge of the dark side, he could even keep the ones he cared about from dying.\"\\n―Sheev Palpatine', metadata={'source': 'https://starwars.fandom.com/wiki/Darth_Plagueis'}),\n", " Document(page_content='Behind the scenes\\n Darth Plagueis as depicted in Star Wars Legends \\nDarth Plagueis was first mentioned on-screen in the 2005 film Star Wars: Episode III Revenge of the Sith, the third and final installment of the Star Wars prequel trilogy. Although Palpatine was never confirmed to be Plagueis\\' apprentice in the movie itself, a link to the official encyclopedia on StarWars.com did refer to Sidious as having been \"trained by Darth Plagueis.\"\\nThe character was created by George Lucas as early as the first draft of Revenge of the Sith—dated April 2003—and possibly earlier. His story was massively expanded upon in the 2012 Star Wars Legends novel Darth Plagueis, written by James Luceno. It notably established Plagueis to be a Muun just as Lucas proposed.', metadata={'source': 'https://starwars.fandom.com/wiki/Darth_Plagueis'})],\n", " 'answer': 'Yes, I am familiar with the tragedy of Darth Plagueis the Wise. Darth Plagueis was a legendary Dark Lord of the Sith who was so powerful and knowledgeable about the dark side of the Force that he could manipulate midi-chlorians to create life and prevent death. However, despite his immense power and wisdom, he was ultimately betrayed and killed by his own apprentice, Darth Sidious (Emperor Palpatine), who later became the Emperor of the Galactic Empire. This tragic story was famously recounted by Palpatine to Anakin Skywalker in \"Star Wars: Episode III Revenge of the Sith.\"'}" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "compound_chain = create_retrieval_chain(compound_retriever, document_chain)\n", "\n", "# compound_chain.invoke(dict(input = 'Do you know the tragedy of Darth Plagueis the Wise?'))" ] } ], "metadata": { "kernelspec": { "display_name": "ML11", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.7" } }, "nbformat": 4, "nbformat_minor": 2 }