Gateston Johns
first real commit
9041389
raw
history blame
No virus
3.46 kB
from __future__ import annotations
import dataclasses
import uuid
from typing import Union
import hashlib
import proto.chunk_pb2 as chunk_pb2
from domain.domain_protocol import DomainProtocol
@dataclasses.dataclass(frozen=True)
class DocumentD(DomainProtocol[chunk_pb2.Document]):
file_path: str
authors: str
publish_date: str
@property
def id(self) -> str:
return hashlib.sha256(self.to_proto().SerializeToString()).hexdigest()
@classmethod
def _from_proto(cls, proto: chunk_pb2.Document) -> DocumentD:
return cls(file_path=proto.file_path,
authors=proto.authors,
publish_date=proto.publish_date)
def to_proto(self) -> chunk_pb2.Document:
return chunk_pb2.Document(file_path=self.file_path,
authors=self.authors,
publish_date=self.publish_date)
@dataclasses.dataclass(frozen=True)
class ChunkD(DomainProtocol[chunk_pb2.Chunk]):
@property
def id(self) -> str:
return str(self.chunk_id)
chunk_text: str
chunk_type: chunk_pb2.ChunkType
chunk_index: int
parent_reference: Union[uuid.UUID, DocumentD]
chunk_id: uuid.UUID = dataclasses.field(default_factory=uuid.uuid4)
def __post_init__(self):
if self.chunk_type == chunk_pb2.ChunkType.CHUNK_TYPE_PAGE:
if not isinstance(self.parent_reference, DocumentD):
raise ValueError(
f"Chunk (id: {self.chunk_id}) with type {self.chunk_type} must have a DocumentD parent_reference."
)
elif not isinstance(self.parent_reference, uuid.UUID):
raise ValueError(
f"Chunk (id: {self.chunk_id}) with type {self.chunk_type} must have a uuid.UUID parent_reference."
)
@classmethod
def _from_proto(cls, proto: chunk_pb2.Chunk) -> ChunkD:
if proto.HasField('parent_chunk_id'):
return cls(chunk_id=uuid.UUID(proto.chunk_id),
parent_reference=uuid.UUID(proto.parent_chunk_id),
chunk_text=proto.chunk_text,
chunk_type=proto.chunk_type,
chunk_index=proto.chunk_index)
elif proto.HasField('document'):
return cls(chunk_id=uuid.UUID(proto.chunk_id),
parent_reference=DocumentD._from_proto(proto.document),
chunk_text=proto.chunk_text,
chunk_type=proto.chunk_type,
chunk_index=proto.chunk_index)
else:
raise ValueError(
f"Chunk proto (id: {proto.chunk_id}) has no 'parent' or 'document' field.")
def to_proto(self) -> chunk_pb2.Chunk:
chunk_proto = chunk_pb2.Chunk()
chunk_proto.chunk_id = str(self.chunk_id)
chunk_proto.chunk_text = self.chunk_text
chunk_proto.chunk_type = self.chunk_type
chunk_proto.chunk_index = self.chunk_index
if isinstance(self.parent_reference, uuid.UUID):
chunk_proto.parent_chunk_id = str(self.parent_reference)
elif isinstance(self.parent_reference, DocumentD):
chunk_proto.document.CopyFrom(self.parent_reference.to_proto())
else:
raise ValueError(
f"Chunk (id: {self.chunk_id}) parent_reference is of unknown type: {type(self.parent_reference)}"
)
return chunk_proto