Spaces:

davidtran999
/

hue-portal-backend-v2

Sleeping

App Files Files Community

hue-portal-backend-v2 / backend /core /models.py

davidtran999

Upload backend/core/models.py with huggingface_hub

77ffb68 verified 8 days ago

raw

history blame contribute delete

14 kB

	from django.db import models
	from django.contrib.postgres.search import SearchVectorField
	from django.contrib.postgres.indexes import GinIndex
	from django.utils import timezone
	import uuid


	def legal_document_upload_path(instance, filename):
	base = "legal_uploads"
	code = (instance.code or uuid.uuid4().hex).replace("/", "_")
	return f"{base}/{code}/{filename}"


	def legal_document_image_upload_path(instance, filename):
	base = "legal_images"
	code = (instance.document.code if instance.document else uuid.uuid4().hex).replace("/", "_")
	timestamp = timezone.now().strftime("%Y%m%d%H%M%S")
	return f"{base}/{code}/{timestamp}_{filename}"

	class Procedure(models.Model):
	title = models.CharField(max_length=500)
	domain = models.CharField(max_length=100, db_index=True) # ANTT/Cư trú/PCCC/GT
	level = models.CharField(max_length=50, blank=True) # Tỉnh/Huyện/Xã
	conditions = models.TextField(blank=True)
	dossier = models.TextField(blank=True)
	fee = models.CharField(max_length=200, blank=True)
	duration = models.CharField(max_length=200, blank=True)
	authority = models.CharField(max_length=300, blank=True)
	source_url = models.URLField(max_length=1000, blank=True)
	updated_at = models.DateTimeField(auto_now=True)
	tsv_body = SearchVectorField(null=True, editable=False)
	embedding = models.BinaryField(null=True, blank=True, editable=False)

	class Meta:
	indexes = [
	GinIndex(fields=["tsv_body"], name="procedure_tsv_idx"),
	]

	def search_vector(self) -> str:
	"""Create searchable text vector for this procedure."""
	fields = [self.title, self.domain, self.level, self.conditions, self.dossier]
	return " ".join(str(f) for f in fields if f)

	class Fine(models.Model):
	code = models.CharField(max_length=50, unique=True)
	name = models.CharField(max_length=500)
	article = models.CharField(max_length=100, blank=True)
	decree = models.CharField(max_length=100, blank=True)
	min_fine = models.DecimalField(max_digits=12, decimal_places=0, null=True, blank=True)
	max_fine = models.DecimalField(max_digits=12, decimal_places=0, null=True, blank=True)
	license_points = models.CharField(max_length=50, blank=True)
	remedial = models.TextField(blank=True)
	source_url = models.URLField(max_length=1000, blank=True)
	tsv_body = SearchVectorField(null=True, editable=False)
	embedding = models.BinaryField(null=True, blank=True, editable=False)

	class Meta:
	indexes = [
	GinIndex(fields=["tsv_body"], name="fine_tsv_idx"),
	]

	def search_vector(self) -> str:
	"""Create searchable text vector for this fine."""
	fields = [self.name, self.code, self.article, self.decree, self.remedial]
	return " ".join(str(f) for f in fields if f)

	class Office(models.Model):
	unit_name = models.CharField(max_length=300)
	address = models.CharField(max_length=500, blank=True)
	district = models.CharField(max_length=100, blank=True, db_index=True)
	working_hours = models.CharField(max_length=200, blank=True)
	phone = models.CharField(max_length=100, blank=True)
	email = models.EmailField(blank=True)
	latitude = models.FloatField(null=True, blank=True)
	longitude = models.FloatField(null=True, blank=True)
	service_scope = models.CharField(max_length=300, blank=True)
	tsv_body = SearchVectorField(null=True, editable=False)
	embedding = models.BinaryField(null=True, blank=True, editable=False)

	class Meta:
	indexes = [
	GinIndex(fields=["tsv_body"], name="office_tsv_idx"),
	]

	def search_vector(self) -> str:
	"""Create searchable text vector for this office."""
	fields = [self.unit_name, self.address, self.district, self.service_scope]
	return " ".join(str(f) for f in fields if f)

	class Advisory(models.Model):
	title = models.CharField(max_length=500)
	summary = models.TextField()
	source_url = models.URLField(max_length=1000, blank=True)
	published_at = models.DateField(null=True, blank=True)
	tsv_body = SearchVectorField(null=True, editable=False)
	embedding = models.BinaryField(null=True, blank=True, editable=False)

	class Meta:
	indexes = [
	GinIndex(fields=["tsv_body"], name="advisory_tsv_idx"),
	]

	def search_vector(self) -> str:
	"""Create searchable text vector for this advisory."""
	fields = [self.title, self.summary]
	return " ".join(str(f) for f in fields if f)


	class LegalDocument(models.Model):
	"""Metadata + raw text for authoritative legal documents."""

	DOCUMENT_TYPES = [
	("decision", "Decision"),
	("circular", "Circular"),
	("guideline", "Guideline"),
	("plan", "Plan"),
	("other", "Other"),
	]

	code = models.CharField(max_length=100, unique=True)
	title = models.CharField(max_length=500)
	doc_type = models.CharField(max_length=30, choices=DOCUMENT_TYPES, default="other")
	summary = models.TextField(blank=True)
	issued_by = models.CharField(max_length=200, blank=True)
	issued_at = models.DateField(null=True, blank=True)
	source_file = models.CharField(max_length=500, blank=True)
	uploaded_file = models.FileField(upload_to=legal_document_upload_path, null=True, blank=True)
	original_filename = models.CharField(max_length=255, blank=True)
	mime_type = models.CharField(max_length=120, blank=True)
	file_size = models.BigIntegerField(null=True, blank=True)
	file_checksum = models.CharField(max_length=128, blank=True)
	content_checksum = models.CharField(max_length=128, blank=True)
	source_url = models.URLField(max_length=1000, blank=True)
	page_count = models.IntegerField(null=True, blank=True)
	raw_text = models.TextField()
	raw_text_ocr = models.TextField(blank=True)
	metadata = models.JSONField(default=dict, blank=True)
	created_at = models.DateTimeField(auto_now_add=True)
	updated_at = models.DateTimeField(auto_now=True)
	tsv_body = SearchVectorField(null=True, editable=False)

	class Meta:
	indexes = [
	GinIndex(fields=["tsv_body"], name="legal_document_tsv_idx"),
	models.Index(fields=["doc_type"]),
	models.Index(fields=["issued_at"]),
	]
	ordering = ["title"]

	def search_vector(self) -> str:
	"""Return concatenated searchable text."""
	fields = [
	self.title,
	self.code,
	self.summary,
	self.issued_by,
	self.raw_text,
	]
	return " ".join(str(f) for f in fields if f)


	class LegalSection(models.Model):
	"""Structured snippet (chapter/section/article) for each legal document."""

	LEVEL_CHOICES = [
	("chapter", "Chapter"),
	("section", "Section"),
	("article", "Article"),
	("clause", "Clause"),
	("note", "Note"),
	("other", "Other"),
	]

	document = models.ForeignKey(
	LegalDocument,
	on_delete=models.CASCADE,
	related_name="sections",
	)
	section_code = models.CharField(max_length=120)
	section_title = models.CharField(max_length=500, blank=True)
	level = models.CharField(max_length=30, choices=LEVEL_CHOICES, default="other")
	order = models.PositiveIntegerField(default=0, db_index=True)
	page_start = models.IntegerField(null=True, blank=True)
	page_end = models.IntegerField(null=True, blank=True)
	content = models.TextField()
	excerpt = models.TextField(blank=True)
	metadata = models.JSONField(default=dict, blank=True)
	is_ocr = models.BooleanField(default=False)
	tsv_body = SearchVectorField(null=True, editable=False)
	embedding = models.BinaryField(null=True, blank=True, editable=False)

	class Meta:
	indexes = [
	GinIndex(fields=["tsv_body"], name="legal_section_tsv_idx"),
	models.Index(fields=["document", "order"]),
	models.Index(fields=["level"]),
	]
	ordering = ["document", "order"]
	unique_together = ("document", "section_code", "order")

	def search_vector(self) -> str:
	fields = [
	self.section_title,
	self.section_code,
	self.content,
	self.excerpt,
	]
	return " ".join(str(f) for f in fields if f)


	class Synonym(models.Model):
	keyword = models.CharField(max_length=120, unique=True)
	alias = models.CharField(max_length=120)


	class LegalDocumentImage(models.Model):
	"""Metadata for images extracted from uploaded legal documents."""

	document = models.ForeignKey(
	LegalDocument,
	on_delete=models.CASCADE,
	related_name="images",
	)
	image = models.ImageField(upload_to=legal_document_image_upload_path)
	page_number = models.IntegerField(null=True, blank=True)
	description = models.CharField(max_length=255, blank=True)
	width = models.IntegerField(null=True, blank=True)
	height = models.IntegerField(null=True, blank=True)
	checksum = models.CharField(max_length=128, blank=True)
	created_at = models.DateTimeField(auto_now_add=True)

	class Meta:
	indexes = [
	models.Index(fields=["document", "page_number"]),
	models.Index(fields=["checksum"]),
	]

	def __str__(self) -> str:
	return f"Image {self.id} of {self.document.code}"


	class IngestionJob(models.Model):
	"""Background ingestion task information."""

	STATUS_PENDING = "pending"
	STATUS_RUNNING = "running"
	STATUS_COMPLETED = "completed"
	STATUS_FAILED = "failed"

	STATUS_CHOICES = [
	(STATUS_PENDING, "Pending"),
	(STATUS_RUNNING, "Running"),
	(STATUS_COMPLETED, "Completed"),
	(STATUS_FAILED, "Failed"),
	]

	id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
	code = models.CharField(max_length=128)
	filename = models.CharField(max_length=255)
	document = models.ForeignKey(
	LegalDocument,
	related_name="ingestion_jobs",
	on_delete=models.SET_NULL,
	null=True,
	blank=True,
	)
	metadata = models.JSONField(default=dict, blank=True)
	stats = models.JSONField(default=dict, blank=True)
	status = models.CharField(max_length=20, choices=STATUS_CHOICES, default=STATUS_PENDING)
	error_message = models.TextField(blank=True)
	storage_path = models.CharField(max_length=512, blank=True)
	progress = models.PositiveIntegerField(default=0)
	created_at = models.DateTimeField(auto_now_add=True)
	updated_at = models.DateTimeField(auto_now=True)
	started_at = models.DateTimeField(null=True, blank=True)
	finished_at = models.DateTimeField(null=True, blank=True)

	class Meta:
	ordering = ("-created_at",)

	def __str__(self) -> str: # pragma: no cover - trivial
	return f"IngestionJob({self.code}, {self.status})"

	class AuditLog(models.Model):
	created_at = models.DateTimeField(auto_now_add=True)
	ip = models.GenericIPAddressField(null=True, blank=True)
	user_agent = models.CharField(max_length=300, blank=True)
	path = models.CharField(max_length=300)
	query = models.CharField(max_length=500, blank=True)
	status = models.IntegerField(default=200)
	intent = models.CharField(max_length=50, blank=True)
	confidence = models.FloatField(null=True, blank=True)
	latency_ms = models.FloatField(null=True, blank=True)


	class MLMetrics(models.Model):
	date = models.DateField(unique=True)
	total_requests = models.IntegerField(default=0)
	intent_accuracy = models.FloatField(null=True, blank=True)
	average_latency_ms = models.FloatField(null=True, blank=True)
	error_rate = models.FloatField(null=True, blank=True)
	intent_breakdown = models.JSONField(default=dict, blank=True)
	generated_at = models.DateTimeField(auto_now_add=True)

	class Meta:
	ordering = ["-date"]
	verbose_name = "ML Metrics"
	verbose_name_plural = "ML Metrics"


	class ConversationSession(models.Model):
	"""Model to store conversation sessions for context management."""
	session_id = models.UUIDField(default=uuid.uuid4, unique=True, editable=False)
	user_id = models.CharField(max_length=100, null=True, blank=True, db_index=True)
	created_at = models.DateTimeField(auto_now_add=True)
	updated_at = models.DateTimeField(auto_now=True)
	metadata = models.JSONField(default=dict, blank=True)

	class Meta:
	ordering = ["-updated_at"]
	verbose_name = "Conversation Session"
	verbose_name_plural = "Conversation Sessions"
	indexes = [
	models.Index(fields=["session_id"]),
	models.Index(fields=["user_id", "-updated_at"]),
	]

	def __str__(self):
	return f"Session {self.session_id}"


	class ConversationMessage(models.Model):
	"""Model to store individual messages in a conversation session."""
	ROLE_CHOICES = [
	("user", "User"),
	("bot", "Bot"),
	]

	session = models.ForeignKey(
	ConversationSession,
	on_delete=models.CASCADE,
	related_name="messages"
	)
	role = models.CharField(max_length=10, choices=ROLE_CHOICES)
	content = models.TextField()
	intent = models.CharField(max_length=50, blank=True, null=True)
	entities = models.JSONField(default=dict, blank=True)
	timestamp = models.DateTimeField(auto_now_add=True)
	metadata = models.JSONField(default=dict, blank=True)

	class Meta:
	ordering = ["timestamp"]
	verbose_name = "Conversation Message"
	verbose_name_plural = "Conversation Messages"
	indexes = [
	models.Index(fields=["session", "timestamp"]),
	models.Index(fields=["session", "role", "timestamp"]),
	]

	def __str__(self):
	return f"{self.role}: {self.content[:50]}..."