import streamlit as st import pandas as pd import numpy as np import plotly.graph_objects as go from config import PERIODS, METHODS, COLORS def show_scatter(DF_RAW): st.header("Scatter Plot") METRIC_HIERARCHY = { "Valuation": ["PER", "PBR", "EV_EBITDA", "시가총액/매출액", "시가총액/영업이익"], "Profitability": ["ROE", "영업이익률", "EBITDA/Sales", "총자산이익률"], "Activity": ["자산회전율"], "Stability": ["자기자본비율", "부채비율"] } with st.sidebar: st.markdown("### 분류") classification_type = st.radio("분석 기준", ["EMSEC", "EMTEC"], horizontal=True, key="class_type2") cls_df = DF_RAW.copy() # Classification 열을 직접 사용하지 않음 if classification_type == "EMSEC": l1_options = ["전체"] + sorted(cls_df["Sector"].dropna().unique()) l1_label = "Sector" l1_selection = st.selectbox(l1_label, l1_options, key="sector_sel2") l2_label = "Industry" else: l1_options = ["전체"] + sorted(cls_df["Theme"].dropna().unique()) l1_label = "Theme" l1_selection = st.selectbox(l1_label, l1_options, key="theme_sel2") l2_label = "Technology" st.markdown("### 설정") period_sel = st.selectbox("기간", PERIODS, key="period_sel2") metric_group = st.selectbox("지표 그룹", list(METRIC_HIERARCHY.keys()), key="metric_group2") metric_options = METRIC_HIERARCHY[metric_group] metric_sel = st.selectbox("지표", metric_options, key="metric_sel2") country_sel = st.selectbox("국가", ["전체", "한국", "미국", "일본"], key="country_sel2") market_pool_options = { "전체": ["전체"] + sorted(cls_df["Market"].dropna().unique()), "한국": ["전체", "KOSPI", "KOSDAQ"], "미국": ["전체", "NASDAQ", "NYSE"], "일본": ["전체", "Prime (Domestic Stocks)", "Standard (Domestic Stocks)", "Prime (Foreign Stocks)"], } market_sel = st.selectbox("거래소", market_pool_options.get(country_sel, ["전체"]), key="market_sel2") def filter_data(df: pd.DataFrame) -> pd.DataFrame: d = df[df.Year == period_sel].copy() if classification_type == "EMSEC": if l1_selection != "전체": d = d[d["Sector"] == l1_selection] else: if l1_selection != "전체": d = d[d["Theme"] == l1_selection] if country_sel != "전체": d = d[d["Country"] == country_sel] if market_sel != "전체": d = d[d["Market"] == market_sel] return d FILT_DATA = filter_data(DF_RAW) metric_col = metric_sel if metric_col not in FILT_DATA.columns: st.error(f"'{metric_col}' 열이 데이터에 없습니다. 데이터나 설정을 확인해주세요.") st.stop() def harmonic_mean(arr: pd.Series): arr = arr.dropna() arr = arr[arr > 0] return len(arr) / (1 / arr).sum() if len(arr) > 0 else np.nan def aggregate_by_group(sub: pd.DataFrame, metric_col: str) -> pd.Series: sub_unique = sub.drop_duplicates(subset=["ticker"]) arr = pd.to_numeric(sub_unique[metric_col], errors="coerce") res = { "AVG": arr.mean(), "MED": arr.median(), "HRM": harmonic_mean(arr) } num, den = None, None if metric_sel == "PER": num = sub_unique["Market Cap (2024-12-31)_USD"].sum() den = sub_unique["Net_Income"].sum() elif metric_sel == "PBR": num = sub_unique["Market Cap (2024-12-31)_USD"].sum() den = sub_unique["Book"].sum() elif metric_sel == "EV_EBITDA": num = sub_unique["Enterprise Value (FQ0)_USD"].sum() den = sub_unique["EBITDA"].sum() if num is not None and den is not None and den != 0: res["AGG"] = num / den else: res["AGG"] = res["AVG"] res["기업 수"] = len(arr.dropna()) return pd.Series(res) if FILT_DATA.empty: st.warning("선택하신 조건에 맞는 데이터가 없습니다.") st.stop() agg_df = FILT_DATA.groupby(l2_label).apply(lambda g: aggregate_by_group(g, metric_col)) agg_df = agg_df.dropna(how='all', subset=METHODS).sort_index() st.caption(f"분석 기준: {classification_type} > {l1_selection}") if agg_df.empty: st.warning("집계 결과 데이터가 없어 차트를 그릴 수 없습니다.") st.stop() fig = go.Figure() for method in METHODS: fig.add_trace(go.Scatter( x=agg_df.index, y=agg_df[method], customdata=agg_df[['기업 수']].to_numpy(), mode="markers", marker=dict(size=12, color=COLORS[method], line=dict(width=1, color="white")), name=method, hovertemplate=f"{agg_df.index.name}: %{{x}}
{metric_sel}: %{{y:.2f}}
계산 방식: {method}
기업 수: %{{customdata[0]}}
" )) y_axis_title = f"{metric_sel} ({period_sel})" fig.update_layout( title=dict(text=f"{l2_label}별 '{y_axis_title}' 비교 (계산 방식별)", x=0.5, xanchor='center'), xaxis_title=l2_label, yaxis_title=y_axis_title, xaxis_tickangle=-45, legend_title="계산 방식", height=650, hovermode="x unified" ) st.plotly_chart(fig, use_container_width=True) sel_list = [v for v in [classification_type, l1_selection, period_sel, metric_sel, country_sel if country_sel != "전체" else None, market_sel if market_sel != "전체" else None] if v and v != "전체"] st.caption(" | ".join(sel_list) + f" • 그룹 수: {len(agg_df):,}")