mbudisic commited on
Commit
be858e2
·
1 Parent(s): 6a9e2f3

feat: Enhance application signal handling and update dependencies

Browse files

- Added signal handling for graceful shutdown on SIGINT (Ctrl+C) in `app.py`.
- Introduced `db_persist` configuration option in the `Configuration` class to manage database persistence.
- Updated `DatastoreManager` to utilize `pathvalidate` for sanitizing file paths and filenames.
- Included `pathvalidate` as a new dependency in `pyproject.toml` and updated `uv.lock` accordingly.

app.py CHANGED
@@ -1,6 +1,7 @@
1
  from pstuts_rag.configuration import Configuration
2
  import asyncio
3
  from typing import cast
 
4
 
5
  import chainlit as cl
6
  from dotenv import load_dotenv
@@ -299,5 +300,10 @@ async def main(input_message: cl.Message):
299
 
300
 
301
  if __name__ == "__main__":
302
- # This will start the Chainlit app when run directly
 
 
 
 
 
303
  pass
 
1
  from pstuts_rag.configuration import Configuration
2
  import asyncio
3
  from typing import cast
4
+ import signal
5
 
6
  import chainlit as cl
7
  from dotenv import load_dotenv
 
300
 
301
 
302
  if __name__ == "__main__":
303
+
304
+ def handle_sigint(signum, frame):
305
+ print("SIGINT received (Ctrl+C), exiting...")
306
+ sys.exit(0)
307
+
308
+ signal.signal(signal.SIGINT, handle_sigint)
309
  pass
pstuts_rag/pstuts_rag/configuration.py CHANGED
@@ -70,6 +70,8 @@ class Configuration:
70
 
71
  search_permission: str = str(os.environ.get("EVA_SEARCH_PERMISSION", "no"))
72
 
 
 
73
  thread_id: str = ""
74
 
75
  @classmethod
 
70
 
71
  search_permission: str = str(os.environ.get("EVA_SEARCH_PERMISSION", "no"))
72
 
73
+ db_persist: str | None = os.environ.get("EVA_DB_PERSIST", None)
74
+
75
  thread_id: str = ""
76
 
77
  @classmethod
pstuts_rag/pstuts_rag/datastore.py CHANGED
@@ -1,4 +1,5 @@
1
  import asyncio
 
2
  import json
3
  import glob
4
  import aiofiles
@@ -24,6 +25,7 @@ from qdrant_client import QdrantClient
24
  from qdrant_client.http.models import Distance, VectorParams
25
  from qdrant_client.models import PointStruct
26
  from pstuts_rag.utils import get_embeddings_api, flatten, batch
 
27
 
28
 
29
  class DatastoreManager:
@@ -56,7 +58,7 @@ class DatastoreManager:
56
  def __init__(
57
  self,
58
  embeddings: Optional[Embeddings] = None,
59
- qdrant_client: QdrantClient = QdrantClient(location=":memory:"),
60
  name: str = str(object=uuid.uuid4()),
61
  config: Configuration = Configuration(),
62
  ) -> None:
@@ -76,7 +78,39 @@ class DatastoreManager:
76
  self.embeddings = embeddings
77
 
78
  self.name = name if name else config.eva_workflow_name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  self.qdrant_client = qdrant_client
 
 
80
  self.loading_complete = asyncio.Event()
81
  self._completion_callbacks = []
82
 
 
1
  import asyncio
2
+ import atexit
3
  import json
4
  import glob
5
  import aiofiles
 
25
  from qdrant_client.http.models import Distance, VectorParams
26
  from qdrant_client.models import PointStruct
27
  from pstuts_rag.utils import get_embeddings_api, flatten, batch
28
+ from pathvalidate import sanitize_filename, sanitize_filepath
29
 
30
 
31
  class DatastoreManager:
 
58
  def __init__(
59
  self,
60
  embeddings: Optional[Embeddings] = None,
61
+ qdrant_client: QdrantClient | None = None,
62
  name: str = str(object=uuid.uuid4()),
63
  config: Configuration = Configuration(),
64
  ) -> None:
 
78
  self.embeddings = embeddings
79
 
80
  self.name = name if name else config.eva_workflow_name
81
+
82
+ if qdrant_client is None:
83
+
84
+ try:
85
+ if (
86
+ config.db_persist
87
+ and isinstance(config.db_persist, str)
88
+ and len(config.db_persist) > 0
89
+ ):
90
+ qdrant_path = Path(
91
+ sanitize_filepath(config.db_persist)
92
+ ) / sanitize_filename(config.embedding_model)
93
+ logging.info(
94
+ "Persisting the datastore to: %s",
95
+ str(qdrant_path),
96
+ )
97
+
98
+ qdrant_path.mkdir(parents=True, exist_ok=True)
99
+
100
+ qdrant_client = QdrantClient(path=str(qdrant_path))
101
+ except (OSError, ValueError) as e:
102
+ logging.error(
103
+ "Persistence aborted, exception occurred: %s: %s",
104
+ type(e).__name__,
105
+ str(e),
106
+ )
107
+ finally:
108
+ if qdrant_client is None:
109
+ qdrant_client = QdrantClient(location=":memory:")
110
+
111
  self.qdrant_client = qdrant_client
112
+ atexit.register(qdrant_client.close)
113
+
114
  self.loading_complete = asyncio.Event()
115
  self._completion_callbacks = []
116
 
pyproject.toml CHANGED
@@ -51,6 +51,7 @@ dependencies = [
51
  "langgraph-cli[inmem]>=0.1.55",
52
  "langchain-tavily>=0.2.0",
53
  "beautifulsoup4>=4.13.4",
 
54
  ]
55
  authors = [{ name = "Marko Budisic", email = "mbudisic@gmail.com" }]
56
  license = "MIT"
 
51
  "langgraph-cli[inmem]>=0.1.55",
52
  "langchain-tavily>=0.2.0",
53
  "beautifulsoup4>=4.13.4",
54
+ "pathvalidate>=3.2.3",
55
  ]
56
  authors = [{ name = "Marko Budisic", email = "mbudisic@gmail.com" }]
57
  license = "MIT"
uv.lock CHANGED
@@ -3509,6 +3509,15 @@ wheels = [
3509
  { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191 },
3510
  ]
3511
 
 
 
 
 
 
 
 
 
 
3512
  [[package]]
3513
  name = "pexpect"
3514
  version = "4.9.0"
@@ -3766,6 +3775,7 @@ dependencies = [
3766
  { name = "nest-asyncio" },
3767
  { name = "numpy" },
3768
  { name = "pandas" },
 
3769
  { name = "pyarrow" },
3770
  { name = "python-dotenv" },
3771
  { name = "qdrant-client" },
@@ -3838,6 +3848,7 @@ requires-dist = [
3838
  { name = "nest-asyncio", specifier = ">=1.5.6" },
3839
  { name = "numpy", specifier = ">=2.2.2" },
3840
  { name = "pandas", specifier = ">=2.0.0" },
 
3841
  { name = "pyarrow", specifier = ">=19.0.0" },
3842
  { name = "pylint-venv", marker = "extra == 'dev'", specifier = ">=3.0.4" },
3843
  { name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0.0" },
 
3509
  { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191 },
3510
  ]
3511
 
3512
+ [[package]]
3513
+ name = "pathvalidate"
3514
+ version = "3.2.3"
3515
+ source = { registry = "https://pypi.org/simple" }
3516
+ sdist = { url = "https://files.pythonhosted.org/packages/92/87/c7a2f51cc62df0495acb0ed2533a7c74cc895e569a1b020ee5f6e9fa4e21/pathvalidate-3.2.3.tar.gz", hash = "sha256:59b5b9278e30382d6d213497623043ebe63f10e29055be4419a9c04c721739cb", size = 61717 }
3517
+ wheels = [
3518
+ { url = "https://files.pythonhosted.org/packages/50/14/c5a0e1a947909810fc4c043b84cac472b70e438148d34f5393be1bac663f/pathvalidate-3.2.3-py3-none-any.whl", hash = "sha256:5eaf0562e345d4b6d0c0239d0f690c3bd84d2a9a3c4c73b99ea667401b27bee1", size = 24130 },
3519
+ ]
3520
+
3521
  [[package]]
3522
  name = "pexpect"
3523
  version = "4.9.0"
 
3775
  { name = "nest-asyncio" },
3776
  { name = "numpy" },
3777
  { name = "pandas" },
3778
+ { name = "pathvalidate" },
3779
  { name = "pyarrow" },
3780
  { name = "python-dotenv" },
3781
  { name = "qdrant-client" },
 
3848
  { name = "nest-asyncio", specifier = ">=1.5.6" },
3849
  { name = "numpy", specifier = ">=2.2.2" },
3850
  { name = "pandas", specifier = ">=2.0.0" },
3851
+ { name = "pathvalidate", specifier = ">=3.2.3" },
3852
  { name = "pyarrow", specifier = ">=19.0.0" },
3853
  { name = "pylint-venv", marker = "extra == 'dev'", specifier = ">=3.0.4" },
3854
  { name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0.0" },