Spaces:
Build error
Build error
| import os | |
| # from dotenv import load_dotenv | |
| from chromadb.config import Settings | |
| # https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/excel.html?highlight=xlsx#microsoft-excel | |
| from langchain.document_loaders import CSVLoader, PDFMinerLoader, TextLoader, UnstructuredExcelLoader, Docx2txtLoader | |
| # load_dotenv() | |
| ROOT_DIRECTORY = os.path.dirname(os.path.realpath(__file__)) | |
| # Define the folder for storing database | |
| SOURCE_DIRECTORY = f"{ROOT_DIRECTORY}/SOURCE_DOCUMENTS" | |
| PERSIST_DIRECTORY = f"{ROOT_DIRECTORY}/DB" | |
| # Can be changed to a specific number | |
| INGEST_THREADS = os.cpu_count() or 8 | |
| # Define the Chroma settings | |
| CHROMA_SETTINGS = Settings( | |
| chroma_db_impl="duckdb+parquet", persist_directory=PERSIST_DIRECTORY, anonymized_telemetry=False | |
| ) | |
| # https://python.langchain.com/en/latest/_modules/langchain/document_loaders/excel.html#UnstructuredExcelLoader | |
| DOCUMENT_MAP = { | |
| ".txt": TextLoader, | |
| ".md": TextLoader, | |
| ".py": TextLoader, | |
| ".pdf": PDFMinerLoader, | |
| ".csv": CSVLoader, | |
| ".xls": UnstructuredExcelLoader, | |
| ".xlsx": UnstructuredExcelLoader, | |
| ".docx": Docx2txtLoader, | |
| ".doc": Docx2txtLoader, | |
| } | |
| # Default Instructor Model | |
| EMBEDDING_MODEL_NAME = "hkunlp/instructor-large" | |
| # You can also choose a smaller model, don't forget to change HuggingFaceInstructEmbeddings | |
| # to HuggingFaceEmbeddings in both ingest.py and run_localGPT.py | |
| # EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2" | |