Spaces:

alwaysgood
/

my-tide-env

Sleeping

my-tide-env / performance_api.py

SeungHyeok Jang

change the pipeline logic

47b6af0 22 days ago

24.6 kB

	"""
	Real-time Performance Evaluation API
	실시간 예측 성능 평가 및 모니터링 API
	"""

	from fastapi import FastAPI, HTTPException, Header, Query
	from datetime import datetime, timedelta
	import numpy as np
	import pandas as pd
	from typing import Optional, List, Dict, Any
	import logging
	from config import INTERNAL_API_KEY

	logger = logging.getLogger(__name__)

	# 성능 데이터 저장소 (실제로는 데이터베이스 사용)
	performance_cache = {
	"realtime_metrics": {},
	"historical_data": [],
	"station_performance": {},
	"alert_thresholds": {
	"rmse_warning": 30.0,
	"rmse_critical": 50.0,
	"accuracy_warning": 80.0,
	"accuracy_critical": 70.0
	}
	}

	def verify_internal_api_key(authorization: str = Header(None)):
	"""내부 API 키 검증"""
	if authorization and authorization.startswith("Bearer "):
	return authorization == f"Bearer {INTERNAL_API_KEY}"
	return False

	def register_performance_routes(app: FastAPI):
	"""성능 평가 API 라우트 등록"""

	@app.get("/api/performance/realtime", tags=["Performance"])
	async def get_realtime_performance(
	station_id: Optional[str] = Query(None, description="특정 관측소 성능 (전체: None)"),
	authorization: str = Header(None)
	):
	"""실시간 예측 성능 지표 조회"""

	# 내부 API는 인증 필요, 외부는 읽기 전용
	is_internal = verify_internal_api_key(authorization)

	try:
	current_time = datetime.now()

	if station_id:
	# 특정 관측소 성능
	station_metrics = await get_station_performance(station_id)
	return {
	"timestamp": current_time.isoformat(),
	"station_id": station_id,
	**station_metrics,
	"data_source": "realtime"
	}
	else:
	# 전체 시스템 성능
	overall_metrics = await get_overall_performance()

	response_data = {
	"timestamp": current_time.isoformat(),
	"rmse": overall_metrics["rmse"],
	"mae": overall_metrics["mae"],
	"accuracy": overall_metrics["accuracy"],
	"prediction_count": overall_metrics["prediction_count"],
	"active_stations": overall_metrics["active_stations"],
	"data_quality_score": overall_metrics["data_quality_score"],
	"status": overall_metrics["status"]
	}

	# 내부 요청시 추가 정보 제공
	if is_internal:
	response_data.update({
	"detailed_metrics": overall_metrics.get("detailed_metrics", {}),
	"station_breakdown": overall_metrics.get("station_breakdown", {}),
	"recent_alerts": overall_metrics.get("recent_alerts", [])
	})

	return response_data

	except Exception as e:
	logger.error(f"Realtime performance query failed: {str(e)}")
	raise HTTPException(status_code=500, detail=str(e))

	@app.get("/api/performance/historical", tags=["Performance"])
	async def get_historical_performance(
	hours: int = Query(24, description="조회할 시간 범위 (시간)"),
	station_id: Optional[str] = Query(None, description="특정 관측소"),
	metric: str = Query("rmse", description="성능 지표 (rmse/mae/accuracy)"),
	authorization: str = Header(None)
	):
	"""성능 히스토리 조회"""

	is_internal = verify_internal_api_key(authorization)

	try:
	# 시간 범위 검증
	if hours > 168: # 최대 1주일
	hours = 168

	end_time = datetime.now()
	start_time = end_time - timedelta(hours=hours)

	historical_data = await get_performance_history(
	start_time, end_time, station_id, metric
	)

	# 통계 계산
	if historical_data:
	values = [item[metric] for item in historical_data if item.get(metric) is not None]

	if values:
	statistics = {
	"mean": round(np.mean(values), 2),
	"std": round(np.std(values), 2),
	"min": round(min(values), 2),
	"max": round(max(values), 2),
	"trend": calculate_trend(values)
	}
	else:
	statistics = None
	else:
	statistics = None

	return {
	"timestamp": end_time.isoformat(),
	"query_range": {
	"start_time": start_time.isoformat(),
	"end_time": end_time.isoformat(),
	"hours": hours
	},
	"station_id": station_id,
	"metric": metric,
	"data_points": len(historical_data),
	"data": historical_data[-100:] if not is_internal else historical_data, # 외부는 최근 100개만
	"statistics": statistics
	}

	except Exception as e:
	logger.error(f"Historical performance query failed: {str(e)}")
	raise HTTPException(status_code=500, detail=str(e))

	@app.get("/api/performance/comparison", tags=["Performance"])
	async def compare_station_performance(
	station_ids: str = Query(..., description="비교할 관측소들 (쉼표 구분)"),
	metric: str = Query("rmse", description="비교할 성능 지표"),
	period: str = Query("24h", description="비교 기간 (1h/6h/24h/7d)"),
	authorization: str = Header(None)
	):
	"""관측소별 성능 비교"""

	is_internal = verify_internal_api_key(authorization)

	try:
	# 관측소 목록 파싱
	stations = [s.strip() for s in station_ids.split(",")]

	if len(stations) > 10: # 최대 10개 관측소
	stations = stations[:10]

	# 기간 파싱
	period_hours = {
	"1h": 1, "6h": 6, "24h": 24, "7d": 168
	}.get(period, 24)

	comparison_data = {}

	for station_id in stations:
	station_metrics = await get_station_performance_summary(
	station_id, period_hours, metric
	)
	comparison_data[station_id] = station_metrics

	# 순위 계산
	if metric in ["rmse", "mae"]:
	# 낮을수록 좋음
	sorted_stations = sorted(
	comparison_data.items(),
	key=lambda x: x[1].get("current_value", float('inf'))
	)
	else: # accuracy 등
	# 높을수록 좋음
	sorted_stations = sorted(
	comparison_data.items(),
	key=lambda x: x[1].get("current_value", 0),
	reverse=True
	)

	return {
	"timestamp": datetime.now().isoformat(),
	"metric": metric,
	"period": period,
	"stations_count": len(stations),
	"comparison": comparison_data,
	"ranking": [{"rank": i+1, "station_id": station, "value": data["current_value"]}
	for i, (station, data) in enumerate(sorted_stations)],
	"best_performer": sorted_stations[0][0] if sorted_stations else None,
	"summary": {
	"best_value": sorted_stations[0][1]["current_value"] if sorted_stations else None,
	"worst_value": sorted_stations[-1][1]["current_value"] if sorted_stations else None,
	"average_value": round(np.mean([data["current_value"] for data in comparison_data.values()]), 2)
	}
	}

	except Exception as e:
	logger.error(f"Station comparison failed: {str(e)}")
	raise HTTPException(status_code=500, detail=str(e))

	@app.get("/api/performance/alerts", tags=["Performance"])
	async def get_performance_alerts(
	active_only: bool = Query(True, description="활성 알림만 조회"),
	hours: int = Query(24, description="조회할 시간 범위"),
	authorization: str = Header(None)
	):
	"""성능 알림 조회"""

	verify_internal_api_key(authorization) # 내부 API만 접근 가능

	try:
	alerts = await get_current_alerts(active_only, hours)

	# 알림 분류
	critical_alerts = [a for a in alerts if a["severity"] == "critical"]
	warning_alerts = [a for a in alerts if a["severity"] == "warning"]

	return {
	"timestamp": datetime.now().isoformat(),
	"query_range_hours": hours,
	"active_only": active_only,
	"total_alerts": len(alerts),
	"critical_count": len(critical_alerts),
	"warning_count": len(warning_alerts),
	"alerts": alerts,
	"summary": {
	"system_status": "critical" if critical_alerts else ("warning" if warning_alerts else "normal"),
	"requires_attention": len(critical_alerts) > 0,
	"most_recent": alerts[0] if alerts else None
	}
	}

	except Exception as e:
	logger.error(f"Performance alerts query failed: {str(e)}")
	raise HTTPException(status_code=500, detail=str(e))

	@app.post("/api/performance/update", tags=["Performance"])
	async def update_performance_metrics(
	request_data: Dict[str, Any],
	authorization: str = Header(None)
	):
	"""성능 지표 업데이트 (내부 사용)"""

	verify_internal_api_key(authorization) # 내부 API만 접근 가능

	try:
	station_id = request_data.get("station_id")
	predictions = request_data.get("predictions", [])
	actual_values = request_data.get("actual_values", [])
	timestamp = request_data.get("timestamp", datetime.now().isoformat())

	if not predictions or not actual_values:
	raise HTTPException(status_code=400, detail="Predictions and actual values required")

	if len(predictions) != len(actual_values):
	raise HTTPException(status_code=400, detail="Predictions and actual values must have same length")

	# 성능 지표 계산
	metrics = calculate_performance_metrics(predictions, actual_values)

	# 성능 데이터 저장
	performance_record = {
	"station_id": station_id,
	"timestamp": timestamp,
	"predictions": predictions,
	"actual_values": actual_values,
	"metrics": metrics,
	"data_points": len(predictions)
	}

	await save_performance_record(performance_record)

	# 실시간 캐시 업데이트
	performance_cache["realtime_metrics"][station_id] = metrics
	performance_cache["historical_data"].append(performance_record)

	# 오래된 데이터 정리 (메모리 관리)
	if len(performance_cache["historical_data"]) > 1000:
	performance_cache["historical_data"] = performance_cache["historical_data"][-500:]

	# 알림 체크
	alerts = check_performance_alerts(station_id, metrics)

	return {
	"success": True,
	"timestamp": datetime.now().isoformat(),
	"station_id": station_id,
	"metrics": metrics,
	"data_points": len(predictions),
	"alerts_triggered": len(alerts),
	"alerts": alerts
	}

	except Exception as e:
	logger.error(f"Performance update failed: {str(e)}")
	raise HTTPException(status_code=500, detail=str(e))

	# ============================================================================
	# 성능 계산 및 분석 함수들
	# ============================================================================

	async def get_overall_performance():
	"""전체 시스템 성능 조회"""

	# 시연 모드에서는 시뮬레이션 데이터 사용
	from internal_api import demo_session, active_issues

	base_rmse = 18.5
	base_mae = 14.2
	base_accuracy = 89.2

	# 활성 문제들의 영향 계산
	rmse_multiplier = 1.0
	accuracy_penalty = 0

	for issue in active_issues.values():
	if issue["type"] == "extreme_weather":
	rmse_multiplier *= 1.8
	accuracy_penalty += 15
	elif issue["type"] == "sensor_malfunction":
	rmse_multiplier *= 1.3
	accuracy_penalty += 8
	elif issue["type"] == "data_corruption":
	rmse_multiplier *= 1.5
	accuracy_penalty += 12
	elif issue["type"] == "network_failure":
	rmse_multiplier *= 1.2
	accuracy_penalty += 5

	# 랜덤 변동 추가
	rmse_variation = np.random.normal(0, 2)
	accuracy_variation = np.random.normal(0, 3)

	current_rmse = max(10, base_rmse * rmse_multiplier + rmse_variation)
	current_mae = max(8, base_mae * rmse_multiplier * 0.8 + rmse_variation * 0.7)
	current_accuracy = max(50, min(98, base_accuracy - accuracy_penalty + accuracy_variation))

	# 데이터 품질 점수
	data_quality = 95 - len(active_issues) * 10

	# 상태 결정
	if current_rmse > 40 or current_accuracy < 75:
	status = "critical"
	elif current_rmse > 25 or current_accuracy < 85:
	status = "warning"
	else:
	status = "good"

	return {
	"rmse": round(current_rmse, 1),
	"mae": round(current_mae, 1),
	"accuracy": round(current_accuracy, 1),
	"prediction_count": demo_session.get("total_processed", 0),
	"active_stations": len(demo_session.get("active_stations", [])),
	"data_quality_score": round(data_quality, 1),
	"status": status,
	"detailed_metrics": {
	"rmse_trend": "increasing" if len(active_issues) > 0 else "stable",
	"prediction_latency_ms": np.random.randint(50, 200),
	"data_freshness_minutes": np.random.randint(1, 10)
	},
	"station_breakdown": await get_all_stations_summary(),
	"recent_alerts": await get_recent_alerts_summary()
	}

	async def get_station_performance(station_id: str):
	"""특정 관측소 성능 조회"""

	# 기본 성능 + 관측소별 변동
	station_seed = hash(station_id) % 1000
	np.random.seed(station_seed)

	base_rmse = 18.5 + np.random.normal(0, 3)
	base_mae = 14.2 + np.random.normal(0, 2)
	base_accuracy = 89.2 + np.random.normal(0, 5)

	# 활성 문제 영향
	from internal_api import active_issues

	for issue in active_issues.values():
	if issue.get("station_id") == station_id or not issue.get("station_id"):
	if issue["type"] == "sensor_malfunction" and issue.get("station_id") == station_id:
	base_rmse *= 2.0
	base_accuracy -= 25
	elif issue["type"] == "extreme_weather":
	base_rmse *= 1.6
	base_accuracy -= 12

	return {
	"rmse": round(max(10, base_rmse), 1),
	"mae": round(max(8, base_mae), 1),
	"accuracy": round(max(50, min(98, base_accuracy)), 1),
	"data_points": np.random.randint(50, 200),
	"last_prediction": (datetime.now() - timedelta(minutes=np.random.randint(1, 15))).isoformat(),
	"status": "active" if station_id in demo_session.get("active_stations", []) else "inactive"
	}

	async def get_performance_history(start_time, end_time, station_id, metric):
	"""성능 히스토리 생성 (시뮬레이션)"""

	data_points = []
	current_time = start_time

	# 1시간 간격으로 데이터 생성
	while current_time <= end_time:

	# 시간대별 기본값
	hour = current_time.hour
	base_value = {
	"rmse": 18.5 + 5 * np.sin(2 * np.pi * hour / 24),
	"mae": 14.2 + 3 * np.sin(2 * np.pi * hour / 24),
	"accuracy": 89.2 - 8 * np.sin(2 * np.pi * hour / 24)
	}

	# 랜덤 변동
	variation = np.random.normal(0, 2)
	value = base_value[metric] + variation

	# 관측소별 조정
	if station_id:
	station_offset = hash(station_id) % 10 - 5
	value += station_offset

	data_points.append({
	"timestamp": current_time.isoformat(),
	"station_id": station_id,
	metric: round(value, 1),
	"data_points": np.random.randint(10, 50)
	})

	current_time += timedelta(hours=1)

	return data_points

	async def get_station_performance_summary(station_id, period_hours, metric):
	"""관측소 성능 요약"""

	# 현재 성능
	current_perf = await get_station_performance(station_id)
	current_value = current_perf[metric]

	# 기간별 평균 (시뮬레이션)
	period_variation = np.random.normal(0, 5)
	period_average = current_value + period_variation

	# 트렌드 계산
	trend_change = np.random.uniform(-10, 10)
	trend = "improving" if trend_change < -2 else ("degrading" if trend_change > 2 else "stable")

	return {
	"station_id": station_id,
	"current_value": current_value,
	"period_average": round(period_average, 1),
	"trend": trend,
	"trend_change": round(trend_change, 1),
	"data_points": np.random.randint(20, 100),
	"last_update": datetime.now().isoformat()
	}

	def calculate_performance_metrics(predictions, actual_values):
	"""성능 지표 계산"""

	predictions = np.array(predictions)
	actual_values = np.array(actual_values)

	# RMSE
	rmse = np.sqrt(np.mean((predictions - actual_values) ** 2))

	# MAE
	mae = np.mean(np.abs(predictions - actual_values))

	# 정확도 (95% 신뢰구간 내 예측 비율)
	errors = np.abs(predictions - actual_values)
	threshold = np.percentile(errors, 95)
	accuracy = np.mean(errors <= threshold) * 100

	# 추가 지표
	mape = np.mean(np.abs((actual_values - predictions) / actual_values)) * 100
	r2 = 1 - (np.sum((actual_values - predictions) 2) / np.sum((actual_values - np.mean(actual_values)) 2))

	return {
	"rmse": round(rmse, 2),
	"mae": round(mae, 2),
	"accuracy": round(accuracy, 1),
	"mape": round(mape, 2),
	"r2_score": round(r2, 3),
	"data_points": len(predictions)
	}

	def calculate_trend(values):
	"""트렌드 계산"""
	if len(values) < 3:
	return "insufficient_data"

	# 선형 회귀로 기울기 계산
	x = np.arange(len(values))
	slope = np.polyfit(x, values, 1)[0]

	if slope > 0.5:
	return "increasing"
	elif slope < -0.5:
	return "decreasing"
	else:
	return "stable"

	def check_performance_alerts(station_id, metrics):
	"""성능 알림 체크"""

	alerts = []
	thresholds = performance_cache["alert_thresholds"]

	# RMSE 체크
	rmse = metrics["rmse"]
	if rmse > thresholds["rmse_critical"]:
	alerts.append({
	"type": "rmse_critical",
	"severity": "critical",
	"message": f"Critical RMSE level: {rmse} cm",
	"station_id": station_id,
	"threshold": thresholds["rmse_critical"],
	"current_value": rmse,
	"timestamp": datetime.now().isoformat()
	})
	elif rmse > thresholds["rmse_warning"]:
	alerts.append({
	"type": "rmse_warning",
	"severity": "warning",
	"message": f"High RMSE level: {rmse} cm",
	"station_id": station_id,
	"threshold": thresholds["rmse_warning"],
	"current_value": rmse,
	"timestamp": datetime.now().isoformat()
	})

	# 정확도 체크
	accuracy = metrics["accuracy"]
	if accuracy < thresholds["accuracy_critical"]:
	alerts.append({
	"type": "accuracy_critical",
	"severity": "critical",
	"message": f"Critical accuracy drop: {accuracy}%",
	"station_id": station_id,
	"threshold": thresholds["accuracy_critical"],
	"current_value": accuracy,
	"timestamp": datetime.now().isoformat()
	})
	elif accuracy < thresholds["accuracy_warning"]:
	alerts.append({
	"type": "accuracy_warning",
	"severity": "warning",
	"message": f"Low accuracy: {accuracy}%",
	"station_id": station_id,
	"threshold": thresholds["accuracy_warning"],
	"current_value": accuracy,
	"timestamp": datetime.now().isoformat()
	})

	return alerts

	async def get_current_alerts(active_only, hours):
	"""현재 알림 조회 (시뮬레이션)"""

	from internal_api import active_issues, demo_session

	alerts = []

	# 활성 문제들을 알림으로 변환
	for issue_id, issue in active_issues.items():
	alerts.append({
	"id": issue_id,
	"type": f"{issue['type']}_alert",
	"severity": "critical" if issue["type"] in ["extreme_weather", "sensor_malfunction"] else "warning",
	"message": f"{issue['type'].replace('_', ' ').title()} detected",
	"station_id": issue.get("station_id"),
	"timestamp": issue["start_time"].isoformat(),
	"duration_minutes": (datetime.now() - issue["start_time"]).total_seconds() / 60,
	"active": True
	})

	# 추가 성능 알림 시뮬레이션
	if demo_session.get("active"):
	current_perf = await get_overall_performance()

	if current_perf["rmse"] > 30:
	alerts.append({
	"id": "perf_rmse_high",
	"type": "performance_degradation",
	"severity": "warning",
	"message": f"High system RMSE: {current_perf['rmse']} cm",
	"timestamp": (datetime.now() - timedelta(minutes=5)).isoformat(),
	"duration_minutes": 5,
	"active": True
	})

	# 정렬 (최신순)
	alerts.sort(key=lambda x: x["timestamp"], reverse=True)

	return alerts

	async def get_all_stations_summary():
	"""모든 관측소 성능 요약"""

	from internal_api import demo_session
	stations = demo_session.get("active_stations", ["DT_0001", "DT_0002"])

	summary = {}
	for station_id in stations:
	perf = await get_station_performance(station_id)
	summary[station_id] = {
	"rmse": perf["rmse"],
	"accuracy": perf["accuracy"],
	"status": perf["status"]
	}

	return summary

	async def get_recent_alerts_summary():
	"""최근 알림 요약"""

	alerts = await get_current_alerts(True, 1) # 최근 1시간

	return {
	"total_count": len(alerts),
	"critical_count": sum(1 for a in alerts if a["severity"] == "critical"),
	"most_recent": alerts[0] if alerts else None
	}

	async def save_performance_record(performance_record):
	"""성능 기록 저장 (실제로는 데이터베이스 사용)"""

	# 여기서는 메모리에만 저장 (시뮬레이션)
	performance_cache["historical_data"].append(performance_record)

	# 실제 구현에서는 Supabase나 다른 DB에 저장
	# await supabase.table("performance_metrics").insert(performance_record)

	pass

	logger.info("Performance API module loaded successfully")