dylanglenister commited on
Commit
b529dd2
·
1 Parent(s): 24ed5c4

FEAT: Starting on medical information repository.

Browse files
src/data/connection.py CHANGED
@@ -16,6 +16,7 @@ class Collections:
16
  SESSION = "sessions"
17
  MEDICAL_RECORDS = "medical_records"
18
  MEDICAL_MEMORY = "medical_memory"
 
19
 
20
  class ActionFailed(Exception):
21
  """
 
16
  SESSION = "sessions"
17
  MEDICAL_RECORDS = "medical_records"
18
  MEDICAL_MEMORY = "medical_memory"
19
+ INFORMATION = "chunks"
20
 
21
  class ActionFailed(Exception):
22
  """
src/data/repositories/information.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # src/data/repositories/information.py
2
+
3
+ """
4
+ Contains all saved medical reference information to be used with RAG.
5
+ This may need to be renamed.
6
+
7
+ ## Fields
8
+ chunk_id:
9
+ content:
10
+ embedding:
11
+ embedding_model:
12
+ embedding_dim:
13
+ metadata:
14
+ metadata.parent_id:
15
+ metadata.source:
16
+ metadata.task:
17
+ metadata.sequence:
18
+ metadata.total_chunks:
19
+ metadata.content_type:
20
+ metadata.related_chunks:
21
+ metadata.chunk_length:
22
+ metadata.created_timestamp:
23
+ """
24
+
25
+ from pymongo.errors import ConnectionFailure, PyMongoError
26
+
27
+ from src.data.connection import ActionFailed, Collections, get_collection
28
+ from src.models.information import InfoChunk, InfoChunkMetadata
29
+ from src.utils.logger import logger
30
+
31
+
32
+ def get_chunk(
33
+ chunk_id: str,
34
+ *,
35
+ collection_name: str = Collections.INFORMATION
36
+ ) -> InfoChunk:
37
+ try:
38
+ collection = get_collection(collection_name)
39
+ cursor = collection.find(
40
+ {"chunk_id": chunk_id}
41
+ )
42
+
43
+ return InfoChunk.model_validate(cursor[0])
44
+ except (ConnectionFailure, PyMongoError) as e:
45
+ logger().error(f"Database error listing sessions for chunk '{chunk_id}': {e}")
46
+ raise ActionFailed("A database error occurred while retrieving chunk.") from e
47
+
48
+ # TODO Embedding search
49
+
50
+ # TODO Everything else :(
src/models/information.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # src/models/information.py
2
+
3
+ from datetime import datetime
4
+
5
+ from pydantic import BaseModel, ConfigDict, Field
6
+
7
+
8
+ class InfoChunkMetadata(BaseModel):
9
+ """Pydantic model for the nested metadata object."""
10
+ parent_id: str
11
+ source: str
12
+ task: str
13
+ sequence: int
14
+ total_chunks: int
15
+ content_type: str
16
+ related_chunks: list[str] | None = None
17
+ chunk_length: int | None = None
18
+ created_timestamp: datetime | None = None
19
+
20
+ model_config = ConfigDict(
21
+ frozen=True,
22
+ from_attributes=True,
23
+ populate_by_name=True
24
+ )
25
+
26
+ class InfoChunk(BaseModel):
27
+ """Pydantic model for the MongoDB collection."""
28
+ chunk_id: str
29
+ content: str
30
+ embedding: list[float]
31
+ embedding_model: str
32
+ embedding_dim: int = Field(..., gt=0)
33
+ metadata: InfoChunkMetadata
34
+
35
+ model_config = ConfigDict(
36
+ frozen=True,
37
+ from_attributes=True,
38
+ populate_by_name=True
39
+ )