""" embeddings.py Module for processing and storing document embeddings using ChromaDB. """ import os from langchain_openai import OpenAIEmbeddings from langchain_chroma import Chroma from langchain.text_splitter import RecursiveCharacterTextSplitter PERSIST_DIRECTORY = "./chroma_db/courses" def process_documents_with_chroma(documents): """Processes documents and stores embeddings in ChromaDB. Args: documents (list): List of documents to be embedded. Returns: Chroma: Vector store with document embeddings. """ if os.path.exists(PERSIST_DIRECTORY): print("Loading existing embeddings from ChromaDB...") vector_store = Chroma(persist_directory=PERSIST_DIRECTORY, embedding_function=OpenAIEmbeddings()) else: print("Creating new embeddings and saving to ChromaDB...") text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100) texts = text_splitter.split_documents(documents) embeddings = OpenAIEmbeddings() vector_store = Chroma.from_documents(texts, embeddings, persist_directory=PERSIST_DIRECTORY) return vector_store