File size: 1,154 Bytes
999f447
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
"""
embeddings.py

Module for processing and storing document embeddings using ChromaDB.
"""

import os
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter

PERSIST_DIRECTORY = "./chroma_db/courses"

def process_documents_with_chroma(documents):
    """Processes documents and stores embeddings in ChromaDB.

    Args:
        documents (list): List of documents to be embedded.

    Returns:
        Chroma: Vector store with document embeddings.
    """
    if os.path.exists(PERSIST_DIRECTORY):
        print("Loading existing embeddings from ChromaDB...")
        vector_store = Chroma(persist_directory=PERSIST_DIRECTORY, embedding_function=OpenAIEmbeddings())
    else:
        print("Creating new embeddings and saving to ChromaDB...")
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100)
        texts = text_splitter.split_documents(documents)

        embeddings = OpenAIEmbeddings()
        vector_store = Chroma.from_documents(texts, embeddings, persist_directory=PERSIST_DIRECTORY)
    return vector_store