Course-Finder-AI / embeddings.py
raghuv-aditya's picture
Create embeddings.py
999f447 verified
raw
history blame
1.15 kB
"""
embeddings.py
Module for processing and storing document embeddings using ChromaDB.
"""
import os
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
PERSIST_DIRECTORY = "./chroma_db/courses"
def process_documents_with_chroma(documents):
"""Processes documents and stores embeddings in ChromaDB.
Args:
documents (list): List of documents to be embedded.
Returns:
Chroma: Vector store with document embeddings.
"""
if os.path.exists(PERSIST_DIRECTORY):
print("Loading existing embeddings from ChromaDB...")
vector_store = Chroma(persist_directory=PERSIST_DIRECTORY, embedding_function=OpenAIEmbeddings())
else:
print("Creating new embeddings and saving to ChromaDB...")
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100)
texts = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings()
vector_store = Chroma.from_documents(texts, embeddings, persist_directory=PERSIST_DIRECTORY)
return vector_store