Spaces:

alwaysgood
/

my-tide-env

Sleeping

App Files Files Community

my-tide-env / noise_scenarios.py

alwaysgood

Update noise_scenarios.py

ea729fc verified 16 days ago

raw

history blame contribute delete

20.2 kB

	# noise_scenarios.py - 노이즈 시나리오 생성 엔진

	import numpy as np
	import plotly.graph_objects as go
	from plotly.subplots import make_subplots
	import random

	def generate_typhoon_scenario(df, intensity=1.0):
	"""
	태풍 시나리오: 급격한 기압 하강 + 강풍 + 조위 상승
	intensity: 0.5(약함) ~ 2.0(매우 강함)
	df는 이미 144개로 슬라이싱된 상태
	"""
	print(f"🌀 태풍 시나리오 생성 (강도: {intensity}, 데이터: {len(df)}개)")

	df_noisy = df.copy()
	n_points = len(df) # 144개

	# 태풍 시나리오 설정 (144개 기준)
	if n_points >= 72:
	# 충분한 데이터가 있으면 중간-후반에 태풍 배치
	typhoon_center = int(n_points * 0.7) # 70% 지점 (약 100번째)
	typhoon_duration = min(int(24 * intensity), n_points // 3) # 최대 2시간
	else:
	# 적은 데이터면 중간에 배치
	typhoon_center = n_points // 2
	typhoon_duration = min(int(12 * intensity), n_points // 2) # 최대 1시간

	start_idx = max(0, typhoon_center - typhoon_duration // 2)
	end_idx = min(n_points, typhoon_center + typhoon_duration // 2)

	print(f" 🌪️ 태풍 구간: {start_idx}-{end_idx} ({end_idx-start_idx}개 포인트)")

	for i in range(start_idx, end_idx):
	# 안전한 인덱스 체크
	if i >= len(df_noisy):
	break

	# 태풍 중심으로부터의 거리 (0~1)
	if typhoon_duration <= 2:
	distance_from_center = 0 # 매우 짧은 경우 균등 적용
	else:
	distance_from_center = abs(i - typhoon_center) / max(1, typhoon_duration // 2)

	typhoon_strength = (1 - distance_from_center) * intensity

	# DataFrame 인덱스 리셋 (안전한 접근을 위해)
	actual_idx = df_noisy.index[i] if i < len(df_noisy) else df_noisy.index[-1]

	# 1. 기압 급강하 (중심에서 최대 -80hPa) - 더 강하게
	if 'air_pres' in df_noisy.columns:
	pressure_drop = -80 * typhoon_strength * (1 + np.random.normal(0, 0.3))
	df_noisy.at[actual_idx, 'air_pres'] += pressure_drop

	# 2. 강풍 (최대 40m/s) - 더 강하게
	if 'wind_speed' in df_noisy.columns:
	wind_boost = 35 * typhoon_strength * (1 + np.random.normal(0, 0.4))
	df_noisy.at[actual_idx, 'wind_speed'] += wind_boost

	# 3. 풍향 변화 (태풍 회전) - 더 극적으로
	if 'wind_dir' in df_noisy.columns:
	wind_dir_change = 270 * typhoon_strength * np.sin(distance_from_center * np.pi * 2)
	current_dir = df_noisy.at[actual_idx, 'wind_dir']
	df_noisy.at[actual_idx, 'wind_dir'] = (current_dir + wind_dir_change) % 360

	# 4. 폭풍 해일로 인한 조위 상승 (최대 +150cm) - 더 강하게
	if 'tide_level' in df_noisy.columns:
	storm_surge = 120 * typhoon_strength * (1 + np.random.normal(0, 0.5))
	df_noisy.at[actual_idx, 'tide_level'] += storm_surge

	# 5. 기온 변화 (구름으로 인한 온도 하강) - 더 강하게
	if 'air_temp' in df_noisy.columns:
	temp_drop = -12 * typhoon_strength * (1 + np.random.normal(0, 0.4))
	df_noisy.at[actual_idx, 'air_temp'] += temp_drop

	return df_noisy

	def generate_sensor_malfunction_scenario(df, intensity=1.0):
	"""
	센서 오작동 시나리오: 랜덤한 극값 + 스파이크 노이즈
	intensity: 0.5(약함) ~ 2.0(심각함)
	"""
	print(f"📡 센서 오작동 시나리오 생성 (강도: {intensity})")

	df_noisy = df.copy()
	n_points = len(df)

	# 오작동 포인트 수 (전체의 5-20%)
	malfunction_count = int(n_points * 0.05 * intensity)
	malfunction_indices = random.sample(range(n_points), malfunction_count)

	for idx in malfunction_indices:
	# 안전한 인덱스 체크
	if idx >= len(df_noisy):
	continue

	# 랜덤하게 한 개 컬럼 선택하여 오작동
	numeric_cols = df_noisy.select_dtypes(include=[np.number]).columns.tolist()
	if 'date' in numeric_cols:
	numeric_cols.remove('date')

	if not numeric_cols:
	continue

	malfunction_col = random.choice(numeric_cols)

	# 오작동 유형 랜덤 선택
	malfunction_type = random.choice(['spike', 'stuck', 'drift', 'outlier'])

	# 안전한 인덱스 접근
	actual_idx = df_noisy.index[idx] if idx < len(df_noisy) else df_noisy.index[-1]

	if malfunction_type == 'spike':
	# 급격한 스파이크
	spike_magnitude = intensity * 5 * random.choice([-1, 1])
	original_val = df_noisy.at[actual_idx, malfunction_col]
	df_noisy.at[actual_idx, malfunction_col] = original_val * (1 + spike_magnitude)

	elif malfunction_type == 'stuck':
	# 값이 고정됨 (5-10개 포인트)
	stuck_duration = random.randint(3, int(8 * intensity))
	stuck_value = df_noisy.at[actual_idx, malfunction_col]
	end_idx = min(n_points - 1, idx + stuck_duration)

	for i in range(idx, end_idx + 1):
	if i < len(df_noisy):
	df_noisy.iloc[i, df_noisy.columns.get_loc(malfunction_col)] = stuck_value

	elif malfunction_type == 'drift':
	# 점진적 드리프트
	drift_duration = random.randint(10, int(30 * intensity))
	drift_magnitude = intensity * 2 * random.choice([-1, 1])
	end_idx = min(n_points - 1, idx + drift_duration)

	for i in range(idx, end_idx + 1):
	if i < len(df_noisy):
	drift_factor = (i - idx) / max(1, drift_duration) * drift_magnitude
	original_val = df_noisy.iloc[i, df_noisy.columns.get_loc(malfunction_col)]
	df_noisy.iloc[i, df_noisy.columns.get_loc(malfunction_col)] = original_val * (1 + drift_factor)

	elif malfunction_type == 'outlier':
	# 극값 아웃라이어
	col_std = df_noisy[malfunction_col].std()
	col_mean = df_noisy[malfunction_col].mean()
	outlier_val = col_mean + random.choice([-1, 1]) * col_std * 5 * intensity
	df_noisy.at[actual_idx, malfunction_col] = outlier_val

	return df_noisy

	def generate_burst_missing_scenario(df, intensity=1.0):
	"""
	연속 결측치 시나리오: 센서 완전 실패
	intensity: 0.5(짧은 결측) ~ 2.0(긴 결측)
	"""
	print(f"❌ 연속 결측치 시나리오 생성 (강도: {intensity})")

	df_noisy = df.copy()
	n_points = len(df)

	# 결측 구간 수 (1-3개)
	num_missing_blocks = random.randint(1, 3)

	for _ in range(num_missing_blocks):
	# 결측 구간 길이 (30분 ~ 6시간), 하지만 전체 길이를 초과하지 않음
	max_duration = min(int(72 * intensity), n_points // 2)
	missing_duration = random.randint(int(6 * intensity), max(int(6 * intensity) + 1, max_duration))
	start_idx = random.randint(0, max(1, n_points - missing_duration))
	end_idx = min(n_points, start_idx + missing_duration)

	# 결측시킬 컬럼들 랜덤 선택 (tide_level 포함)
	numeric_cols = df_noisy.select_dtypes(include=[np.number]).columns.tolist()
	if 'date' in numeric_cols:
	numeric_cols.remove('date')

	# 강도에 따라 결측 컬럼 수 결정
	missing_cols_count = random.randint(1, min(len(numeric_cols), int(3 * intensity)))
	missing_cols = random.sample(numeric_cols, missing_cols_count)

	print(f" 📍 {start_idx}-{end_idx} 구간에서 {missing_cols} 결측 처리")

	# 해당 구간을 NaN으로 설정
	for col in missing_cols:
	df_noisy.loc[start_idx:end_idx, col] = np.nan

	return df_noisy

	def generate_extreme_weather_scenario(df, intensity=1.0):
	"""
	극한 기상 시나리오: 폭염, 한파, 폭설 등
	intensity: 0.5(보통) ~ 2.0(극한)
	"""
	print(f"🌡️ 극한 기상 시나리오 생성 (강도: {intensity})")

	df_noisy = df.copy()
	n_points = len(df)

	# 극한 기상 유형 선택
	weather_type = random.choice(['heatwave', 'coldwave', 'highpressure', 'lowpressure'])

	# 영향 구간 (2-8시간), 하지만 전체 길이를 초과하지 않음
	max_duration = min(int(96 * intensity), n_points // 2)
	duration = random.randint(int(24 * intensity), max(int(24 * intensity) + 1, max_duration))
	start_idx = random.randint(0, max(1, n_points - duration))
	end_idx = min(n_points, start_idx + duration)

	for i in range(start_idx, end_idx):
	# 안전한 인덱스 체크
	if i >= len(df_noisy):
	break

	actual_idx = df_noisy.index[i] if i < len(df_noisy) else df_noisy.index[-1]
	progress = (i - start_idx) / max(1, duration) # 0~1
	effect_strength = np.sin(progress * np.pi) * intensity # 중간에 최대

	if weather_type == 'heatwave':
	# 폭염: 고온 + 저기압 + 약한 바람
	if 'air_temp' in df_noisy.columns:
	df_noisy.at[actual_idx, 'air_temp'] += 15 * effect_strength
	if 'air_pres' in df_noisy.columns:
	df_noisy.at[actual_idx, 'air_pres'] -= 10 * effect_strength
	if 'wind_speed' in df_noisy.columns:
	current_wind = df_noisy.at[actual_idx, 'wind_speed']
	df_noisy.at[actual_idx, 'wind_speed'] = current_wind * (1 - 0.5 * effect_strength)
	if 'tide_level' in df_noisy.columns:
	# 열팽창으로 미세한 해수면 상승
	df_noisy.at[actual_idx, 'tide_level'] += 5 * effect_strength

	elif weather_type == 'coldwave':
	# 한파: 저온 + 고기압 + 강한 바람
	if 'air_temp' in df_noisy.columns:
	df_noisy.at[actual_idx, 'air_temp'] -= 20 * effect_strength
	if 'air_pres' in df_noisy.columns:
	df_noisy.at[actual_idx, 'air_pres'] += 20 * effect_strength
	if 'wind_speed' in df_noisy.columns:
	df_noisy.at[actual_idx, 'wind_speed'] += 10 * effect_strength
	if 'tide_level' in df_noisy.columns:
	# 해수 수축으로 미세한 해수면 하강
	df_noisy.at[actual_idx, 'tide_level'] -= 3 * effect_strength

	elif weather_type == 'highpressure':
	# 고기압: 맑은 날씨, 약한 바람
	if 'air_pres' in df_noisy.columns:
	df_noisy.at[actual_idx, 'air_pres'] += 25 * effect_strength
	if 'wind_speed' in df_noisy.columns:
	current_wind = df_noisy.at[actual_idx, 'wind_speed']
	df_noisy.at[actual_idx, 'wind_speed'] = current_wind * (1 - 0.7 * effect_strength)

	elif weather_type == 'lowpressure':
	# 저기압: 흐린 날씨, 강한 바람
	if 'air_pres' in df_noisy.columns:
	df_noisy.at[actual_idx, 'air_pres'] -= 20 * effect_strength
	if 'wind_speed' in df_noisy.columns:
	df_noisy.at[actual_idx, 'wind_speed'] += 8 * effect_strength
	if 'tide_level' in df_noisy.columns:
	# 저기압으로 인한 조위 상승
	df_noisy.at[actual_idx, 'tide_level'] += 15 * effect_strength

	return df_noisy

	def create_noise_comparison_plot(df_original, df_noisy, scenario_name):
	"""
	원본 vs 노이즈 데이터 비교 시각화 (개선된 버전)
	"""
	fig = make_subplots(
	rows=3, cols=2,
	subplot_titles=['🌊 조위 (tide_level)', '🌬️ 기압 (air_pres)',
	'💨 풍속 (wind_speed)', '🌡️ 기온 (air_temp)',
	'🧭 풍향 (wind_dir)', '📊 조위 전체 비교'],
	vertical_spacing=0.10,
	horizontal_spacing=0.08
	)

	# 안전한 DataFrame 정렬 - 같은 길이와 인덱스로 맞추기
	df_orig_slice = df_original.tail(len(df_noisy)).reset_index(drop=True)
	df_noisy_reset = df_noisy.reset_index(drop=True)

	# 최소 길이로 맞추기
	min_len = min(len(df_orig_slice), len(df_noisy_reset))
	df_orig_slice = df_orig_slice[:min_len]
	df_noisy_reset = df_noisy_reset[:min_len]

	# 시간축 (공통 길이 사용)
	time_axis = list(range(len(df_orig_slice)))

	# 색상 설정 (더 선명하게)
	original_color = '#2E86AB' # 진한 파랑
	noise_color = '#F24236' # 진한 빨강

	# 각 변수별 비교 플롯
	variables = ['tide_level', 'air_pres', 'wind_speed', 'air_temp', 'wind_dir']
	positions = [(1,1), (1,2), (2,1), (2,2), (3,1)]
	var_units = ['cm', 'hPa', 'm/s', '°C', '°']

	for var, (row, col), unit in zip(variables, positions, var_units):
	if var in df_orig_slice.columns and var in df_noisy_reset.columns:
	try:
	# 원본 데이터 (실선, 두껍게)
	fig.add_trace(
	go.Scatter(
	x=time_axis, y=df_orig_slice[var],
	name=f'🔵 원본',
	line=dict(color=original_color, width=3),
	showlegend=(row==1 and col==1),
	hovertemplate=f'원본 {var}: %{{y:.1f}}{unit}<br>시점: %{{x}}<extra></extra>'
	),
	row=row, col=col
	)

	# 노이즈 데이터 (점선, 두껍게)
	fig.add_trace(
	go.Scatter(
	x=time_axis, y=df_noisy_reset[var],
	name=f'🔴 노이즈',
	line=dict(color=noise_color, width=2.5, dash='dash'),
	showlegend=(row==1 and col==1),
	hovertemplate=f'노이즈 {var}: %{{y:.1f}}{unit}<br>시점: %{{x}}<extra></extra>'
	),
	row=row, col=col
	)

	# Y축 범위 자동 조정 (변화를 더 명확히 보기 위해) - 안전하게
	orig_vals = df_orig_slice[var].dropna()
	noisy_vals = df_noisy_reset[var].dropna()

	if len(orig_vals) > 0 and len(noisy_vals) > 0:
	original_range = orig_vals.max() - orig_vals.min()
	noisy_range = noisy_vals.max() - noisy_vals.min()

	# 더 큰 범위를 기준으로 패딩 적용
	total_range = max(original_range, noisy_range)
	y_center = (orig_vals.mean() + noisy_vals.mean()) / 2

	if total_range > 0:
	y_min = y_center - total_range * 0.6
	y_max = y_center + total_range * 0.6
	fig.update_yaxes(range=[y_min, y_max], row=row, col=col)

	except Exception as e:
	print(f"변수 {var} 플롯 생성 오류: {e}")
	continue

	# 전체 비교 (tide_level 중심) - 더 크고 선명하게
	if 'tide_level' in df_orig_slice.columns and 'tide_level' in df_noisy_reset.columns:
	try:
	fig.add_trace(
	go.Scatter(
	x=time_axis, y=df_orig_slice['tide_level'],
	name='🔵 원본 조위',
	line=dict(color=original_color, width=4),
	showlegend=True,
	hovertemplate='원본 조위: %{y:.1f}cm<br>시점: %{x}<extra></extra>'
	),
	row=3, col=2
	)
	fig.add_trace(
	go.Scatter(
	x=time_axis, y=df_noisy_reset['tide_level'],
	name='🔴 노이즈 조위',
	line=dict(color=noise_color, width=3, dash='dash'),
	showlegend=True,
	hovertemplate='노이즈 조위: %{y:.1f}cm<br>시점: %{x}<extra></extra>'
	),
	row=3, col=2
	)
	except Exception as e:
	print(f"전체 조위 비교 플롯 생성 오류: {e}")

	# 노이즈 시나리오 구간 하이라이트 추가
	def add_scenario_highlight(scenario_name):
	if scenario_name == 'typhoon':
	# 태풍 구간 계산 (정렬된 DataFrame 기준)
	n_points = len(df_orig_slice)

	if n_points >= 72:
	typhoon_center = int(n_points * 0.7) # 70% 지점
	typhoon_duration = min(24, n_points // 3) # 최대 2시간
	else:
	typhoon_center = n_points // 2
	typhoon_duration = min(12, n_points // 2) # 최대 1시간

	start_idx = max(0, typhoon_center - typhoon_duration // 2)
	end_idx = min(n_points, typhoon_center + typhoon_duration // 2)

	print(f" 📍 시각화 태풍 구간: {start_idx}-{end_idx}")

	# 모든 서브플롯에 배경 영역 추가
	try:
	for row in range(1, 4):
	for col in range(1, 3):
	fig.add_vrect(
	x0=start_idx, x1=end_idx,
	fillcolor="rgba(255,0,0,0.15)",
	layer="below", line_width=0,
	annotation_text="🌀 태풍 구간",
	annotation_position="top left",
	row=row, col=col
	)
	except Exception as e:
	print(f"태풍 구간 하이라이트 오류: {e}")

	add_scenario_highlight(scenario_name)


	# 레이아웃 개선
	fig.update_layout(
	title={
	'text': f"🌪️ 노이즈 시나리오: {scenario_name}",
	'x': 0.5,
	'font': {'size': 20, 'color': '#2E86AB'}
	},
	height=900,
	showlegend=True,
	legend=dict(
	x=0.02, # 왼쪽으로 이동
	y=0.98, # 위쪽으로 이동
	bgcolor='rgba(255,255,255,0.8)',
	bordercolor='gray',
	borderwidth=1,
	font=dict(size=12)
	),
	plot_bgcolor='rgba(248,249,250,0.8)',
	paper_bgcolor='white'
	)

	# X축 레이블 개선
	fig.update_xaxes(title_text="시간 순서", showgrid=True, gridcolor='lightgray')
	fig.update_yaxes(showgrid=True, gridcolor='lightgray')

	return fig

	def apply_noise_scenario(df, scenario_type, intensity=1.0):
	"""
	선택된 노이즈 시나리오 적용 (144개 슬라이싱 후)
	"""
	scenario_functions = {
	'typhoon': generate_typhoon_scenario,
	'sensor_malfunction': generate_sensor_malfunction_scenario,
	'burst_missing': generate_burst_missing_scenario,
	'extreme_weather': generate_extreme_weather_scenario
	}

	if scenario_type not in scenario_functions:
	raise ValueError(f"Unknown scenario type: {scenario_type}")

	print(f"\n🌪️ {scenario_type} 시나리오 적용 중...")

	# 1. 먼저 마지막 144개로 슬라이싱 (실제 모델 입력과 동일)
	print(f"📊 원본 데이터: {len(df)}행")
	df_sliced = df.tail(144).copy()
	print(f"✂️ 슬라이싱 후: {len(df_sliced)}행 (마지막 144개)")

	# 2. 슬라이싱된 데이터에 노이즈 적용
	df_noisy = scenario_functions[scenario_type](df_sliced, intensity)

	# 3. 비교 시각화 생성 (슬라이싱된 원본 vs 노이즈)
	plot = create_noise_comparison_plot(df_sliced, df_noisy, scenario_type)

	return df_noisy, plot