spinxxxx
/

git-issues-priority-ko

Text Classification

commit-priority

Model card Files Files and versions

git-issues-priority-ko / postprocess /to_priority.py

spinxxxx's picture

feat: add issue priority prediction model (score-based)

902efd1 30 days ago

history blame contribute delete

3.85 kB

	"""
	Score를 Priority 클래스로 변환하는 함수

	사용법:
	from postprocess.to_priority import to_priority

	# 기본 사용 (후처리 규칙 없음)
	priority = to_priority(score=0.82, text="로그인 에러")

	# 후처리 규칙 포함
	priority = to_priority(score=0.82, text="로그인 에러", use_rules=True)
	"""

	import json
	import os
	from typing import Optional
	import yaml


	def to_priority(
	score: float,
	text: str = "",
	thresholds_path: str = "score_thresholds.json",
	rules_path: Optional[str] = None,
	use_rules: bool = False
	) -> str:
	"""
	점수를 HIGH/MED/LOW 우선순위로 변환

	Args:
	score: 모델이 예측한 점수 (원래 스케일)
	text: 이슈/커밋 텍스트 (후처리 규칙 사용 시 필요)
	thresholds_path: score_thresholds.json 파일 경로
	rules_path: priority_rules.yaml 파일 경로 (None이면 자동 탐색)
	use_rules: 키워드 기반 후처리 규칙 사용 여부

	Returns:
	"HIGH", "MED", 또는 "LOW"
	"""
	# Threshold 로드
	if os.path.exists(thresholds_path):
	with open(thresholds_path, "r", encoding="utf-8") as f:
	thresholds = json.load(f)
	else:
	raise FileNotFoundError(f"Threshold 파일을 찾을 수 없습니다: {thresholds_path}")

	q_low = thresholds.get("q_low", 0.0)
	q_high = thresholds.get("q_high", 0.0)

	# 후처리 규칙 적용 (옵션)
	if use_rules:
	if rules_path is None:
	# 자동 탐색: postprocess/priority_rules.yaml
	rules_path = os.path.join(os.path.dirname(__file__), "priority_rules.yaml")

	if os.path.exists(rules_path):
	with open(rules_path, "r", encoding="utf-8") as f:
	rules = yaml.safe_load(f)

	text_lower = text.lower()

	# 1. LOW 강제 키워드 체크 (최우선)
	low_keywords = rules.get("low_forced_keywords", [])
	if any(kw in text_lower for kw in low_keywords):
	return "LOW"

	# 2. HIGH 부스트 키워드 체크
	high_keywords = rules.get("high_boost_keywords", [])
	if any(kw in text_lower for kw in high_keywords):
	return "HIGH"

	# 3. 최소 MED 보장 키워드 체크
	min_med_keywords = rules.get("min_med_keywords", [])
	if any(kw in text_lower for kw in min_med_keywords):
	# 점수가 낮아도 최소 MED 보장
	if score <= q_low:
	return "MED"

	# 기본 변환 (threshold 기반)
	if score >= q_high:
	return "HIGH"
	elif score <= q_low:
	return "LOW"
	else:
	return "MED"


	def to_priority_batch(
	scores: list,
	texts: list = None,
	thresholds_path: str = "score_thresholds.json",
	rules_path: Optional[str] = None,
	use_rules: bool = False
	) -> list:
	"""
	배치로 점수를 우선순위로 변환

	Args:
	scores: 점수 리스트
	texts: 텍스트 리스트 (후처리 규칙 사용 시 필요)
	thresholds_path: score_thresholds.json 파일 경로
	rules_path: priority_rules.yaml 파일 경로
	use_rules: 키워드 기반 후처리 규칙 사용 여부

	Returns:
	우선순위 리스트 ["HIGH", "MED", "LOW", ...]
	"""
	if texts is None:
	texts = [""] * len(scores)

	priorities = []
	for score, text in zip(scores, texts):
	priority = to_priority(
	score=score,
	text=text,
	thresholds_path=thresholds_path,
	rules_path=rules_path,
	use_rules=use_rules
	)
	priorities.append(priority)

	return priorities