import os from chromadb.config import Settings from langchain.document_loaders import CSVLoader, PDFMinerLoader, TextLoader, UnstructuredExcelLoader, Docx2txtLoader question = None ROOT_DIRECTORY = os.path.dirname(os.path.realpath(__file__)) SOURCE_DIRECTORY = f"{ROOT_DIRECTORY}/SOURCE_DOCUMENTS" PERSIST_DIRECTORY = f"{ROOT_DIRECTORY}/DB" INGEST_THREADS = os.cpu_count() or 8 CHROMA_SETTINGS = Settings( chroma_db_impl="duckdb+parquet", persist_directory=PERSIST_DIRECTORY, anonymized_telemetry=False ) DOCUMENT_MAP = { ".txt": TextLoader, ".md": TextLoader, ".py": TextLoader, ".pdf": PDFMinerLoader, ".csv": CSVLoader, ".xls": UnstructuredExcelLoader, ".xlsx": UnstructuredExcelLoader, ".docx": Docx2txtLoader, ".doc": Docx2txtLoader, } EMBEDDING_MODEL_NAME = "hkunlp/instructor-large"