import streamlit as st
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from config import PERIODS, METHODS, COLORS
def show_scatter(DF_RAW):
st.header("Scatter Plot")
METRIC_HIERARCHY = {
"Valuation": ["PER", "PBR", "EV_EBITDA", "시가총액/매출액", "시가총액/영업이익"],
"Profitability": ["ROE", "영업이익률", "EBITDA/Sales", "총자산이익률"],
"Activity": ["자산회전율"],
"Stability": ["자기자본비율", "부채비율"]
}
with st.sidebar:
st.markdown("### 분류")
classification_type = st.radio("분석 기준", ["EMSEC", "EMTEC"], horizontal=True, key="class_type2")
cls_df = DF_RAW.copy() # Classification 열을 직접 사용하지 않음
if classification_type == "EMSEC":
l1_options = ["전체"] + sorted(cls_df["Sector"].dropna().unique())
l1_label = "Sector"
l1_selection = st.selectbox(l1_label, l1_options, key="sector_sel2")
l2_label = "Industry"
else:
l1_options = ["전체"] + sorted(cls_df["Theme"].dropna().unique())
l1_label = "Theme"
l1_selection = st.selectbox(l1_label, l1_options, key="theme_sel2")
l2_label = "Technology"
st.markdown("### 설정")
period_sel = st.selectbox("기간", PERIODS, key="period_sel2")
metric_group = st.selectbox("지표 그룹", list(METRIC_HIERARCHY.keys()), key="metric_group2")
metric_options = METRIC_HIERARCHY[metric_group]
metric_sel = st.selectbox("지표", metric_options, key="metric_sel2")
country_sel = st.selectbox("국가", ["전체", "한국", "미국", "일본"], key="country_sel2")
market_pool_options = {
"전체": ["전체"] + sorted(cls_df["Market"].dropna().unique()),
"한국": ["전체", "KOSPI", "KOSDAQ"],
"미국": ["전체", "NASDAQ", "NYSE"],
"일본": ["전체", "Prime (Domestic Stocks)", "Standard (Domestic Stocks)", "Prime (Foreign Stocks)"],
}
market_sel = st.selectbox("거래소", market_pool_options.get(country_sel, ["전체"]), key="market_sel2")
def filter_data(df: pd.DataFrame) -> pd.DataFrame:
d = df[df.Year == period_sel].copy()
if classification_type == "EMSEC":
if l1_selection != "전체":
d = d[d["Sector"] == l1_selection]
else:
if l1_selection != "전체":
d = d[d["Theme"] == l1_selection]
if country_sel != "전체":
d = d[d["Country"] == country_sel]
if market_sel != "전체":
d = d[d["Market"] == market_sel]
return d
FILT_DATA = filter_data(DF_RAW)
metric_col = metric_sel
if metric_col not in FILT_DATA.columns:
st.error(f"'{metric_col}' 열이 데이터에 없습니다. 데이터나 설정을 확인해주세요.")
st.stop()
def harmonic_mean(arr: pd.Series):
arr = arr.dropna()
arr = arr[arr > 0]
return len(arr) / (1 / arr).sum() if len(arr) > 0 else np.nan
def aggregate_by_group(sub: pd.DataFrame, metric_col: str) -> pd.Series:
sub_unique = sub.drop_duplicates(subset=["ticker"])
arr = pd.to_numeric(sub_unique[metric_col], errors="coerce")
res = {
"AVG": arr.mean(),
"MED": arr.median(),
"HRM": harmonic_mean(arr)
}
num, den = None, None
if metric_sel == "PER":
num = sub_unique["Market Cap (2024-12-31)_USD"].sum()
den = sub_unique["Net_Income"].sum()
elif metric_sel == "PBR":
num = sub_unique["Market Cap (2024-12-31)_USD"].sum()
den = sub_unique["Book"].sum()
elif metric_sel == "EV_EBITDA":
num = sub_unique["Enterprise Value (FQ0)_USD"].sum()
den = sub_unique["EBITDA"].sum()
if num is not None and den is not None and den != 0:
res["AGG"] = num / den
else:
res["AGG"] = res["AVG"]
res["기업 수"] = len(arr.dropna())
return pd.Series(res)
if FILT_DATA.empty:
st.warning("선택하신 조건에 맞는 데이터가 없습니다.")
st.stop()
agg_df = FILT_DATA.groupby(l2_label).apply(lambda g: aggregate_by_group(g, metric_col))
agg_df = agg_df.dropna(how='all', subset=METHODS).sort_index()
st.caption(f"분석 기준: {classification_type} > {l1_selection}")
if agg_df.empty:
st.warning("집계 결과 데이터가 없어 차트를 그릴 수 없습니다.")
st.stop()
fig = go.Figure()
for method in METHODS:
fig.add_trace(go.Scatter(
x=agg_df.index,
y=agg_df[method],
customdata=agg_df[['기업 수']].to_numpy(),
mode="markers",
marker=dict(size=12, color=COLORS[method], line=dict(width=1, color="white")),
name=method,
hovertemplate=f"{agg_df.index.name}: %{{x}}
{metric_sel}: %{{y:.2f}}
계산 방식: {method}
기업 수: %{{customdata[0]}}
"
))
y_axis_title = f"{metric_sel} ({period_sel})"
fig.update_layout(
title=dict(text=f"{l2_label}별 '{y_axis_title}' 비교 (계산 방식별)", x=0.5, xanchor='center'),
xaxis_title=l2_label,
yaxis_title=y_axis_title,
xaxis_tickangle=-45,
legend_title="계산 방식",
height=650,
hovermode="x unified"
)
st.plotly_chart(fig, use_container_width=True)
sel_list = [v for v in [classification_type, l1_selection, period_sel, metric_sel, country_sel if country_sel != "전체" else None, market_sel if market_sel != "전체" else None] if v and v != "전체"]
st.caption(" | ".join(sel_list) + f" • 그룹 수: {len(agg_df):,}")