# file: create_vectorstore.py from langchain_community.document_loaders import DirectoryLoader, TextLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_huggingface import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS SOURCE_CODE_PATH = "./source_code" VECTORSTORE_PATH = "./vectorstore/db_faiss" def main(): """ Main function to create a FAISS vectorstore from source code documents. """ print("--- Starting Vectorstore Creation ---") # 1. Load: Ingest all .py files from the source_code directory. print(f"Loading documents from {SOURCE_CODE_PATH}...") loader = DirectoryLoader(SOURCE_CODE_PATH, glob="**/*.py", loader_cls=TextLoader) documents = loader.load() print(f"Loaded {len(documents)} document(s).") # 2. Split: Break documents into smaller, manageable chunks. print("Splitting documents into chunks...") text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) texts = text_splitter.split_documents(documents) print(f"Split into {len(texts)} chunks.") # 3. Embed: Create the embedding model. print("Creating embedding model...") embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2') # 4. Store: Create the FAISS vectorstore and save it to disk. print("Creating FAISS vectorstore...") db = FAISS.from_documents(texts, embeddings) db.save_local(VECTORSTORE_PATH) print("--- Vectorstore Creation Complete ---") print(f"Vectorstore saved at: {VECTORSTORE_PATH}") if __name__ == "__main__": main()