Spaces:
Sleeping
Sleeping
| from django.db import models | |
| from django.contrib.postgres.search import SearchVectorField | |
| from django.contrib.postgres.indexes import GinIndex | |
| from django.utils import timezone | |
| import uuid | |
| def legal_document_upload_path(instance, filename): | |
| base = "legal_uploads" | |
| code = (instance.code or uuid.uuid4().hex).replace("/", "_") | |
| return f"{base}/{code}/{filename}" | |
| def legal_document_image_upload_path(instance, filename): | |
| base = "legal_images" | |
| code = (instance.document.code if instance.document else uuid.uuid4().hex).replace("/", "_") | |
| timestamp = timezone.now().strftime("%Y%m%d%H%M%S") | |
| return f"{base}/{code}/{timestamp}_{filename}" | |
| class Procedure(models.Model): | |
| title = models.CharField(max_length=500) | |
| domain = models.CharField(max_length=100, db_index=True) # ANTT/Cư trú/PCCC/GT | |
| level = models.CharField(max_length=50, blank=True) # Tỉnh/Huyện/Xã | |
| conditions = models.TextField(blank=True) | |
| dossier = models.TextField(blank=True) | |
| fee = models.CharField(max_length=200, blank=True) | |
| duration = models.CharField(max_length=200, blank=True) | |
| authority = models.CharField(max_length=300, blank=True) | |
| source_url = models.URLField(max_length=1000, blank=True) | |
| updated_at = models.DateTimeField(auto_now=True) | |
| tsv_body = SearchVectorField(null=True, editable=False) | |
| embedding = models.BinaryField(null=True, blank=True, editable=False) | |
| class Meta: | |
| indexes = [ | |
| GinIndex(fields=["tsv_body"], name="procedure_tsv_idx"), | |
| ] | |
| def search_vector(self) -> str: | |
| """Create searchable text vector for this procedure.""" | |
| fields = [self.title, self.domain, self.level, self.conditions, self.dossier] | |
| return " ".join(str(f) for f in fields if f) | |
| class Fine(models.Model): | |
| code = models.CharField(max_length=50, unique=True) | |
| name = models.CharField(max_length=500) | |
| article = models.CharField(max_length=100, blank=True) | |
| decree = models.CharField(max_length=100, blank=True) | |
| min_fine = models.DecimalField(max_digits=12, decimal_places=0, null=True, blank=True) | |
| max_fine = models.DecimalField(max_digits=12, decimal_places=0, null=True, blank=True) | |
| license_points = models.CharField(max_length=50, blank=True) | |
| remedial = models.TextField(blank=True) | |
| source_url = models.URLField(max_length=1000, blank=True) | |
| tsv_body = SearchVectorField(null=True, editable=False) | |
| embedding = models.BinaryField(null=True, blank=True, editable=False) | |
| class Meta: | |
| indexes = [ | |
| GinIndex(fields=["tsv_body"], name="fine_tsv_idx"), | |
| ] | |
| def search_vector(self) -> str: | |
| """Create searchable text vector for this fine.""" | |
| fields = [self.name, self.code, self.article, self.decree, self.remedial] | |
| return " ".join(str(f) for f in fields if f) | |
| class Office(models.Model): | |
| unit_name = models.CharField(max_length=300) | |
| address = models.CharField(max_length=500, blank=True) | |
| district = models.CharField(max_length=100, blank=True, db_index=True) | |
| working_hours = models.CharField(max_length=200, blank=True) | |
| phone = models.CharField(max_length=100, blank=True) | |
| email = models.EmailField(blank=True) | |
| latitude = models.FloatField(null=True, blank=True) | |
| longitude = models.FloatField(null=True, blank=True) | |
| service_scope = models.CharField(max_length=300, blank=True) | |
| tsv_body = SearchVectorField(null=True, editable=False) | |
| embedding = models.BinaryField(null=True, blank=True, editable=False) | |
| class Meta: | |
| indexes = [ | |
| GinIndex(fields=["tsv_body"], name="office_tsv_idx"), | |
| ] | |
| def search_vector(self) -> str: | |
| """Create searchable text vector for this office.""" | |
| fields = [self.unit_name, self.address, self.district, self.service_scope] | |
| return " ".join(str(f) for f in fields if f) | |
| class Advisory(models.Model): | |
| title = models.CharField(max_length=500) | |
| summary = models.TextField() | |
| source_url = models.URLField(max_length=1000, blank=True) | |
| published_at = models.DateField(null=True, blank=True) | |
| tsv_body = SearchVectorField(null=True, editable=False) | |
| embedding = models.BinaryField(null=True, blank=True, editable=False) | |
| class Meta: | |
| indexes = [ | |
| GinIndex(fields=["tsv_body"], name="advisory_tsv_idx"), | |
| ] | |
| def search_vector(self) -> str: | |
| """Create searchable text vector for this advisory.""" | |
| fields = [self.title, self.summary] | |
| return " ".join(str(f) for f in fields if f) | |
| class LegalDocument(models.Model): | |
| """Metadata + raw text for authoritative legal documents.""" | |
| DOCUMENT_TYPES = [ | |
| ("decision", "Decision"), | |
| ("circular", "Circular"), | |
| ("guideline", "Guideline"), | |
| ("plan", "Plan"), | |
| ("other", "Other"), | |
| ] | |
| code = models.CharField(max_length=100, unique=True) | |
| title = models.CharField(max_length=500) | |
| doc_type = models.CharField(max_length=30, choices=DOCUMENT_TYPES, default="other") | |
| summary = models.TextField(blank=True) | |
| issued_by = models.CharField(max_length=200, blank=True) | |
| issued_at = models.DateField(null=True, blank=True) | |
| source_file = models.CharField(max_length=500, blank=True) | |
| uploaded_file = models.FileField(upload_to=legal_document_upload_path, null=True, blank=True) | |
| original_filename = models.CharField(max_length=255, blank=True) | |
| mime_type = models.CharField(max_length=120, blank=True) | |
| file_size = models.BigIntegerField(null=True, blank=True) | |
| file_checksum = models.CharField(max_length=128, blank=True) | |
| content_checksum = models.CharField(max_length=128, blank=True) | |
| source_url = models.URLField(max_length=1000, blank=True) | |
| page_count = models.IntegerField(null=True, blank=True) | |
| raw_text = models.TextField() | |
| raw_text_ocr = models.TextField(blank=True) | |
| metadata = models.JSONField(default=dict, blank=True) | |
| created_at = models.DateTimeField(auto_now_add=True) | |
| updated_at = models.DateTimeField(auto_now=True) | |
| tsv_body = SearchVectorField(null=True, editable=False) | |
| class Meta: | |
| indexes = [ | |
| GinIndex(fields=["tsv_body"], name="legal_document_tsv_idx"), | |
| models.Index(fields=["doc_type"]), | |
| models.Index(fields=["issued_at"]), | |
| ] | |
| ordering = ["title"] | |
| def search_vector(self) -> str: | |
| """Return concatenated searchable text.""" | |
| fields = [ | |
| self.title, | |
| self.code, | |
| self.summary, | |
| self.issued_by, | |
| self.raw_text, | |
| ] | |
| return " ".join(str(f) for f in fields if f) | |
| class LegalSection(models.Model): | |
| """Structured snippet (chapter/section/article) for each legal document.""" | |
| LEVEL_CHOICES = [ | |
| ("chapter", "Chapter"), | |
| ("section", "Section"), | |
| ("article", "Article"), | |
| ("clause", "Clause"), | |
| ("note", "Note"), | |
| ("other", "Other"), | |
| ] | |
| document = models.ForeignKey( | |
| LegalDocument, | |
| on_delete=models.CASCADE, | |
| related_name="sections", | |
| ) | |
| section_code = models.CharField(max_length=120) | |
| section_title = models.CharField(max_length=500, blank=True) | |
| level = models.CharField(max_length=30, choices=LEVEL_CHOICES, default="other") | |
| order = models.PositiveIntegerField(default=0, db_index=True) | |
| page_start = models.IntegerField(null=True, blank=True) | |
| page_end = models.IntegerField(null=True, blank=True) | |
| content = models.TextField() | |
| excerpt = models.TextField(blank=True) | |
| metadata = models.JSONField(default=dict, blank=True) | |
| is_ocr = models.BooleanField(default=False) | |
| tsv_body = SearchVectorField(null=True, editable=False) | |
| embedding = models.BinaryField(null=True, blank=True, editable=False) | |
| class Meta: | |
| indexes = [ | |
| GinIndex(fields=["tsv_body"], name="legal_section_tsv_idx"), | |
| models.Index(fields=["document", "order"]), | |
| models.Index(fields=["level"]), | |
| ] | |
| ordering = ["document", "order"] | |
| unique_together = ("document", "section_code", "order") | |
| def search_vector(self) -> str: | |
| fields = [ | |
| self.section_title, | |
| self.section_code, | |
| self.content, | |
| self.excerpt, | |
| ] | |
| return " ".join(str(f) for f in fields if f) | |
| class Synonym(models.Model): | |
| keyword = models.CharField(max_length=120, unique=True) | |
| alias = models.CharField(max_length=120) | |
| class LegalDocumentImage(models.Model): | |
| """Metadata for images extracted from uploaded legal documents.""" | |
| document = models.ForeignKey( | |
| LegalDocument, | |
| on_delete=models.CASCADE, | |
| related_name="images", | |
| ) | |
| image = models.ImageField(upload_to=legal_document_image_upload_path) | |
| page_number = models.IntegerField(null=True, blank=True) | |
| description = models.CharField(max_length=255, blank=True) | |
| width = models.IntegerField(null=True, blank=True) | |
| height = models.IntegerField(null=True, blank=True) | |
| checksum = models.CharField(max_length=128, blank=True) | |
| created_at = models.DateTimeField(auto_now_add=True) | |
| class Meta: | |
| indexes = [ | |
| models.Index(fields=["document", "page_number"]), | |
| models.Index(fields=["checksum"]), | |
| ] | |
| def __str__(self) -> str: | |
| return f"Image {self.id} of {self.document.code}" | |
| class IngestionJob(models.Model): | |
| """Background ingestion task information.""" | |
| STATUS_PENDING = "pending" | |
| STATUS_RUNNING = "running" | |
| STATUS_COMPLETED = "completed" | |
| STATUS_FAILED = "failed" | |
| STATUS_CHOICES = [ | |
| (STATUS_PENDING, "Pending"), | |
| (STATUS_RUNNING, "Running"), | |
| (STATUS_COMPLETED, "Completed"), | |
| (STATUS_FAILED, "Failed"), | |
| ] | |
| id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) | |
| code = models.CharField(max_length=128) | |
| filename = models.CharField(max_length=255) | |
| document = models.ForeignKey( | |
| LegalDocument, | |
| related_name="ingestion_jobs", | |
| on_delete=models.SET_NULL, | |
| null=True, | |
| blank=True, | |
| ) | |
| metadata = models.JSONField(default=dict, blank=True) | |
| stats = models.JSONField(default=dict, blank=True) | |
| status = models.CharField(max_length=20, choices=STATUS_CHOICES, default=STATUS_PENDING) | |
| error_message = models.TextField(blank=True) | |
| storage_path = models.CharField(max_length=512, blank=True) | |
| progress = models.PositiveIntegerField(default=0) | |
| created_at = models.DateTimeField(auto_now_add=True) | |
| updated_at = models.DateTimeField(auto_now=True) | |
| started_at = models.DateTimeField(null=True, blank=True) | |
| finished_at = models.DateTimeField(null=True, blank=True) | |
| class Meta: | |
| ordering = ("-created_at",) | |
| def __str__(self) -> str: # pragma: no cover - trivial | |
| return f"IngestionJob({self.code}, {self.status})" | |
| class AuditLog(models.Model): | |
| created_at = models.DateTimeField(auto_now_add=True) | |
| ip = models.GenericIPAddressField(null=True, blank=True) | |
| user_agent = models.CharField(max_length=300, blank=True) | |
| path = models.CharField(max_length=300) | |
| query = models.CharField(max_length=500, blank=True) | |
| status = models.IntegerField(default=200) | |
| intent = models.CharField(max_length=50, blank=True) | |
| confidence = models.FloatField(null=True, blank=True) | |
| latency_ms = models.FloatField(null=True, blank=True) | |
| class MLMetrics(models.Model): | |
| date = models.DateField(unique=True) | |
| total_requests = models.IntegerField(default=0) | |
| intent_accuracy = models.FloatField(null=True, blank=True) | |
| average_latency_ms = models.FloatField(null=True, blank=True) | |
| error_rate = models.FloatField(null=True, blank=True) | |
| intent_breakdown = models.JSONField(default=dict, blank=True) | |
| generated_at = models.DateTimeField(auto_now_add=True) | |
| class Meta: | |
| ordering = ["-date"] | |
| verbose_name = "ML Metrics" | |
| verbose_name_plural = "ML Metrics" | |
| class ConversationSession(models.Model): | |
| """Model to store conversation sessions for context management.""" | |
| session_id = models.UUIDField(default=uuid.uuid4, unique=True, editable=False) | |
| user_id = models.CharField(max_length=100, null=True, blank=True, db_index=True) | |
| created_at = models.DateTimeField(auto_now_add=True) | |
| updated_at = models.DateTimeField(auto_now=True) | |
| metadata = models.JSONField(default=dict, blank=True) | |
| class Meta: | |
| ordering = ["-updated_at"] | |
| verbose_name = "Conversation Session" | |
| verbose_name_plural = "Conversation Sessions" | |
| indexes = [ | |
| models.Index(fields=["session_id"]), | |
| models.Index(fields=["user_id", "-updated_at"]), | |
| ] | |
| def __str__(self): | |
| return f"Session {self.session_id}" | |
| class ConversationMessage(models.Model): | |
| """Model to store individual messages in a conversation session.""" | |
| ROLE_CHOICES = [ | |
| ("user", "User"), | |
| ("bot", "Bot"), | |
| ] | |
| session = models.ForeignKey( | |
| ConversationSession, | |
| on_delete=models.CASCADE, | |
| related_name="messages" | |
| ) | |
| role = models.CharField(max_length=10, choices=ROLE_CHOICES) | |
| content = models.TextField() | |
| intent = models.CharField(max_length=50, blank=True, null=True) | |
| entities = models.JSONField(default=dict, blank=True) | |
| timestamp = models.DateTimeField(auto_now_add=True) | |
| metadata = models.JSONField(default=dict, blank=True) | |
| class Meta: | |
| ordering = ["timestamp"] | |
| verbose_name = "Conversation Message" | |
| verbose_name_plural = "Conversation Messages" | |
| indexes = [ | |
| models.Index(fields=["session", "timestamp"]), | |
| models.Index(fields=["session", "role", "timestamp"]), | |
| ] | |
| def __str__(self): | |
| return f"{self.role}: {self.content[:50]}..." | |