Spaces:
Sleeping
Sleeping
feat: Enhance application signal handling and update dependencies
Browse files- Added signal handling for graceful shutdown on SIGINT (Ctrl+C) in `app.py`.
- Introduced `db_persist` configuration option in the `Configuration` class to manage database persistence.
- Updated `DatastoreManager` to utilize `pathvalidate` for sanitizing file paths and filenames.
- Included `pathvalidate` as a new dependency in `pyproject.toml` and updated `uv.lock` accordingly.
- app.py +7 -1
- pstuts_rag/pstuts_rag/configuration.py +2 -0
- pstuts_rag/pstuts_rag/datastore.py +35 -1
- pyproject.toml +1 -0
- uv.lock +11 -0
app.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
from pstuts_rag.configuration import Configuration
|
| 2 |
import asyncio
|
| 3 |
from typing import cast
|
|
|
|
| 4 |
|
| 5 |
import chainlit as cl
|
| 6 |
from dotenv import load_dotenv
|
|
@@ -299,5 +300,10 @@ async def main(input_message: cl.Message):
|
|
| 299 |
|
| 300 |
|
| 301 |
if __name__ == "__main__":
|
| 302 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 303 |
pass
|
|
|
|
| 1 |
from pstuts_rag.configuration import Configuration
|
| 2 |
import asyncio
|
| 3 |
from typing import cast
|
| 4 |
+
import signal
|
| 5 |
|
| 6 |
import chainlit as cl
|
| 7 |
from dotenv import load_dotenv
|
|
|
|
| 300 |
|
| 301 |
|
| 302 |
if __name__ == "__main__":
|
| 303 |
+
|
| 304 |
+
def handle_sigint(signum, frame):
|
| 305 |
+
print("SIGINT received (Ctrl+C), exiting...")
|
| 306 |
+
sys.exit(0)
|
| 307 |
+
|
| 308 |
+
signal.signal(signal.SIGINT, handle_sigint)
|
| 309 |
pass
|
pstuts_rag/pstuts_rag/configuration.py
CHANGED
|
@@ -70,6 +70,8 @@ class Configuration:
|
|
| 70 |
|
| 71 |
search_permission: str = str(os.environ.get("EVA_SEARCH_PERMISSION", "no"))
|
| 72 |
|
|
|
|
|
|
|
| 73 |
thread_id: str = ""
|
| 74 |
|
| 75 |
@classmethod
|
|
|
|
| 70 |
|
| 71 |
search_permission: str = str(os.environ.get("EVA_SEARCH_PERMISSION", "no"))
|
| 72 |
|
| 73 |
+
db_persist: str | None = os.environ.get("EVA_DB_PERSIST", None)
|
| 74 |
+
|
| 75 |
thread_id: str = ""
|
| 76 |
|
| 77 |
@classmethod
|
pstuts_rag/pstuts_rag/datastore.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
import asyncio
|
|
|
|
| 2 |
import json
|
| 3 |
import glob
|
| 4 |
import aiofiles
|
|
@@ -24,6 +25,7 @@ from qdrant_client import QdrantClient
|
|
| 24 |
from qdrant_client.http.models import Distance, VectorParams
|
| 25 |
from qdrant_client.models import PointStruct
|
| 26 |
from pstuts_rag.utils import get_embeddings_api, flatten, batch
|
|
|
|
| 27 |
|
| 28 |
|
| 29 |
class DatastoreManager:
|
|
@@ -56,7 +58,7 @@ class DatastoreManager:
|
|
| 56 |
def __init__(
|
| 57 |
self,
|
| 58 |
embeddings: Optional[Embeddings] = None,
|
| 59 |
-
qdrant_client: QdrantClient =
|
| 60 |
name: str = str(object=uuid.uuid4()),
|
| 61 |
config: Configuration = Configuration(),
|
| 62 |
) -> None:
|
|
@@ -76,7 +78,39 @@ class DatastoreManager:
|
|
| 76 |
self.embeddings = embeddings
|
| 77 |
|
| 78 |
self.name = name if name else config.eva_workflow_name
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
self.qdrant_client = qdrant_client
|
|
|
|
|
|
|
| 80 |
self.loading_complete = asyncio.Event()
|
| 81 |
self._completion_callbacks = []
|
| 82 |
|
|
|
|
| 1 |
import asyncio
|
| 2 |
+
import atexit
|
| 3 |
import json
|
| 4 |
import glob
|
| 5 |
import aiofiles
|
|
|
|
| 25 |
from qdrant_client.http.models import Distance, VectorParams
|
| 26 |
from qdrant_client.models import PointStruct
|
| 27 |
from pstuts_rag.utils import get_embeddings_api, flatten, batch
|
| 28 |
+
from pathvalidate import sanitize_filename, sanitize_filepath
|
| 29 |
|
| 30 |
|
| 31 |
class DatastoreManager:
|
|
|
|
| 58 |
def __init__(
|
| 59 |
self,
|
| 60 |
embeddings: Optional[Embeddings] = None,
|
| 61 |
+
qdrant_client: QdrantClient | None = None,
|
| 62 |
name: str = str(object=uuid.uuid4()),
|
| 63 |
config: Configuration = Configuration(),
|
| 64 |
) -> None:
|
|
|
|
| 78 |
self.embeddings = embeddings
|
| 79 |
|
| 80 |
self.name = name if name else config.eva_workflow_name
|
| 81 |
+
|
| 82 |
+
if qdrant_client is None:
|
| 83 |
+
|
| 84 |
+
try:
|
| 85 |
+
if (
|
| 86 |
+
config.db_persist
|
| 87 |
+
and isinstance(config.db_persist, str)
|
| 88 |
+
and len(config.db_persist) > 0
|
| 89 |
+
):
|
| 90 |
+
qdrant_path = Path(
|
| 91 |
+
sanitize_filepath(config.db_persist)
|
| 92 |
+
) / sanitize_filename(config.embedding_model)
|
| 93 |
+
logging.info(
|
| 94 |
+
"Persisting the datastore to: %s",
|
| 95 |
+
str(qdrant_path),
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
qdrant_path.mkdir(parents=True, exist_ok=True)
|
| 99 |
+
|
| 100 |
+
qdrant_client = QdrantClient(path=str(qdrant_path))
|
| 101 |
+
except (OSError, ValueError) as e:
|
| 102 |
+
logging.error(
|
| 103 |
+
"Persistence aborted, exception occurred: %s: %s",
|
| 104 |
+
type(e).__name__,
|
| 105 |
+
str(e),
|
| 106 |
+
)
|
| 107 |
+
finally:
|
| 108 |
+
if qdrant_client is None:
|
| 109 |
+
qdrant_client = QdrantClient(location=":memory:")
|
| 110 |
+
|
| 111 |
self.qdrant_client = qdrant_client
|
| 112 |
+
atexit.register(qdrant_client.close)
|
| 113 |
+
|
| 114 |
self.loading_complete = asyncio.Event()
|
| 115 |
self._completion_callbacks = []
|
| 116 |
|
pyproject.toml
CHANGED
|
@@ -51,6 +51,7 @@ dependencies = [
|
|
| 51 |
"langgraph-cli[inmem]>=0.1.55",
|
| 52 |
"langchain-tavily>=0.2.0",
|
| 53 |
"beautifulsoup4>=4.13.4",
|
|
|
|
| 54 |
]
|
| 55 |
authors = [{ name = "Marko Budisic", email = "mbudisic@gmail.com" }]
|
| 56 |
license = "MIT"
|
|
|
|
| 51 |
"langgraph-cli[inmem]>=0.1.55",
|
| 52 |
"langchain-tavily>=0.2.0",
|
| 53 |
"beautifulsoup4>=4.13.4",
|
| 54 |
+
"pathvalidate>=3.2.3",
|
| 55 |
]
|
| 56 |
authors = [{ name = "Marko Budisic", email = "mbudisic@gmail.com" }]
|
| 57 |
license = "MIT"
|
uv.lock
CHANGED
|
@@ -3509,6 +3509,15 @@ wheels = [
|
|
| 3509 |
{ url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191 },
|
| 3510 |
]
|
| 3511 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3512 |
[[package]]
|
| 3513 |
name = "pexpect"
|
| 3514 |
version = "4.9.0"
|
|
@@ -3766,6 +3775,7 @@ dependencies = [
|
|
| 3766 |
{ name = "nest-asyncio" },
|
| 3767 |
{ name = "numpy" },
|
| 3768 |
{ name = "pandas" },
|
|
|
|
| 3769 |
{ name = "pyarrow" },
|
| 3770 |
{ name = "python-dotenv" },
|
| 3771 |
{ name = "qdrant-client" },
|
|
@@ -3838,6 +3848,7 @@ requires-dist = [
|
|
| 3838 |
{ name = "nest-asyncio", specifier = ">=1.5.6" },
|
| 3839 |
{ name = "numpy", specifier = ">=2.2.2" },
|
| 3840 |
{ name = "pandas", specifier = ">=2.0.0" },
|
|
|
|
| 3841 |
{ name = "pyarrow", specifier = ">=19.0.0" },
|
| 3842 |
{ name = "pylint-venv", marker = "extra == 'dev'", specifier = ">=3.0.4" },
|
| 3843 |
{ name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0.0" },
|
|
|
|
| 3509 |
{ url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191 },
|
| 3510 |
]
|
| 3511 |
|
| 3512 |
+
[[package]]
|
| 3513 |
+
name = "pathvalidate"
|
| 3514 |
+
version = "3.2.3"
|
| 3515 |
+
source = { registry = "https://pypi.org/simple" }
|
| 3516 |
+
sdist = { url = "https://files.pythonhosted.org/packages/92/87/c7a2f51cc62df0495acb0ed2533a7c74cc895e569a1b020ee5f6e9fa4e21/pathvalidate-3.2.3.tar.gz", hash = "sha256:59b5b9278e30382d6d213497623043ebe63f10e29055be4419a9c04c721739cb", size = 61717 }
|
| 3517 |
+
wheels = [
|
| 3518 |
+
{ url = "https://files.pythonhosted.org/packages/50/14/c5a0e1a947909810fc4c043b84cac472b70e438148d34f5393be1bac663f/pathvalidate-3.2.3-py3-none-any.whl", hash = "sha256:5eaf0562e345d4b6d0c0239d0f690c3bd84d2a9a3c4c73b99ea667401b27bee1", size = 24130 },
|
| 3519 |
+
]
|
| 3520 |
+
|
| 3521 |
[[package]]
|
| 3522 |
name = "pexpect"
|
| 3523 |
version = "4.9.0"
|
|
|
|
| 3775 |
{ name = "nest-asyncio" },
|
| 3776 |
{ name = "numpy" },
|
| 3777 |
{ name = "pandas" },
|
| 3778 |
+
{ name = "pathvalidate" },
|
| 3779 |
{ name = "pyarrow" },
|
| 3780 |
{ name = "python-dotenv" },
|
| 3781 |
{ name = "qdrant-client" },
|
|
|
|
| 3848 |
{ name = "nest-asyncio", specifier = ">=1.5.6" },
|
| 3849 |
{ name = "numpy", specifier = ">=2.2.2" },
|
| 3850 |
{ name = "pandas", specifier = ">=2.0.0" },
|
| 3851 |
+
{ name = "pathvalidate", specifier = ">=3.2.3" },
|
| 3852 |
{ name = "pyarrow", specifier = ">=19.0.0" },
|
| 3853 |
{ name = "pylint-venv", marker = "extra == 'dev'", specifier = ">=3.0.4" },
|
| 3854 |
{ name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0.0" },
|