ever-flow's picture
Upload 12 files
bd24fca verified
Raw
History Blame Contribute Delete
5.9 kB
import streamlit as st
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from config import PERIODS, METHODS, COLORS
def show_scatter(DF_RAW):
st.header("Scatter Plot")
METRIC_HIERARCHY = {
"Valuation": ["PER", "PBR", "EV_EBITDA", "์‹œ๊ฐ€์ด์•ก/๋งค์ถœ์•ก", "์‹œ๊ฐ€์ด์•ก/์˜์—…์ด์ต"],
"Profitability": ["ROE", "์˜์—…์ด์ต๋ฅ ", "EBITDA/Sales", "์ด์ž์‚ฐ์ด์ต๋ฅ "],
"Activity": ["์ž์‚ฐํšŒ์ „์œจ"],
"Stability": ["์ž๊ธฐ์ž๋ณธ๋น„์œจ", "๋ถ€์ฑ„๋น„์œจ"]
}
with st.sidebar:
st.markdown("### ๋ถ„๋ฅ˜")
classification_type = st.radio("๋ถ„์„ ๊ธฐ์ค€", ["EMSEC", "EMTEC"], horizontal=True, key="class_type2")
cls_df = DF_RAW.copy() # Classification ์—ด์„ ์ง์ ‘ ์‚ฌ์šฉํ•˜์ง€ ์•Š์Œ
if classification_type == "EMSEC":
l1_options = ["์ „์ฒด"] + sorted(cls_df["Sector"].dropna().unique())
l1_label = "Sector"
l1_selection = st.selectbox(l1_label, l1_options, key="sector_sel2")
l2_label = "Industry"
else:
l1_options = ["์ „์ฒด"] + sorted(cls_df["Theme"].dropna().unique())
l1_label = "Theme"
l1_selection = st.selectbox(l1_label, l1_options, key="theme_sel2")
l2_label = "Technology"
st.markdown("### ์„ค์ •")
period_sel = st.selectbox("๊ธฐ๊ฐ„", PERIODS, key="period_sel2")
metric_group = st.selectbox("์ง€ํ‘œ ๊ทธ๋ฃน", list(METRIC_HIERARCHY.keys()), key="metric_group2")
metric_options = METRIC_HIERARCHY[metric_group]
metric_sel = st.selectbox("์ง€ํ‘œ", metric_options, key="metric_sel2")
country_sel = st.selectbox("๊ตญ๊ฐ€", ["์ „์ฒด", "ํ•œ๊ตญ", "๋ฏธ๊ตญ", "์ผ๋ณธ"], key="country_sel2")
market_pool_options = {
"์ „์ฒด": ["์ „์ฒด"] + sorted(cls_df["Market"].dropna().unique()),
"ํ•œ๊ตญ": ["์ „์ฒด", "KOSPI", "KOSDAQ"],
"๋ฏธ๊ตญ": ["์ „์ฒด", "NASDAQ", "NYSE"],
"์ผ๋ณธ": ["์ „์ฒด", "Prime (Domestic Stocks)", "Standard (Domestic Stocks)", "Prime (Foreign Stocks)"],
}
market_sel = st.selectbox("๊ฑฐ๋ž˜์†Œ", market_pool_options.get(country_sel, ["์ „์ฒด"]), key="market_sel2")
def filter_data(df: pd.DataFrame) -> pd.DataFrame:
d = df[df.Year == period_sel].copy()
if classification_type == "EMSEC":
if l1_selection != "์ „์ฒด":
d = d[d["Sector"] == l1_selection]
else:
if l1_selection != "์ „์ฒด":
d = d[d["Theme"] == l1_selection]
if country_sel != "์ „์ฒด":
d = d[d["Country"] == country_sel]
if market_sel != "์ „์ฒด":
d = d[d["Market"] == market_sel]
return d
FILT_DATA = filter_data(DF_RAW)
metric_col = metric_sel
if metric_col not in FILT_DATA.columns:
st.error(f"'{metric_col}' ์—ด์ด ๋ฐ์ดํ„ฐ์— ์—†์Šต๋‹ˆ๋‹ค. ๋ฐ์ดํ„ฐ๋‚˜ ์„ค์ •์„ ํ™•์ธํ•ด์ฃผ์„ธ์š”.")
st.stop()
def harmonic_mean(arr: pd.Series):
arr = arr.dropna()
arr = arr[arr > 0]
return len(arr) / (1 / arr).sum() if len(arr) > 0 else np.nan
def aggregate_by_group(sub: pd.DataFrame, metric_col: str) -> pd.Series:
sub_unique = sub.drop_duplicates(subset=["ticker"])
arr = pd.to_numeric(sub_unique[metric_col], errors="coerce")
res = {
"AVG": arr.mean(),
"MED": arr.median(),
"HRM": harmonic_mean(arr)
}
num, den = None, None
if metric_sel == "PER":
num = sub_unique["Market Cap (2024-12-31)_USD"].sum()
den = sub_unique["Net_Income"].sum()
elif metric_sel == "PBR":
num = sub_unique["Market Cap (2024-12-31)_USD"].sum()
den = sub_unique["Book"].sum()
elif metric_sel == "EV_EBITDA":
num = sub_unique["Enterprise Value (FQ0)_USD"].sum()
den = sub_unique["EBITDA"].sum()
if num is not None and den is not None and den != 0:
res["AGG"] = num / den
else:
res["AGG"] = res["AVG"]
res["๊ธฐ์—… ์ˆ˜"] = len(arr.dropna())
return pd.Series(res)
if FILT_DATA.empty:
st.warning("์„ ํƒํ•˜์‹  ์กฐ๊ฑด์— ๋งž๋Š” ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
st.stop()
agg_df = FILT_DATA.groupby(l2_label).apply(lambda g: aggregate_by_group(g, metric_col))
agg_df = agg_df.dropna(how='all', subset=METHODS).sort_index()
st.caption(f"๋ถ„์„ ๊ธฐ์ค€: {classification_type} > {l1_selection}")
if agg_df.empty:
st.warning("์ง‘๊ณ„ ๊ฒฐ๊ณผ ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์–ด ์ฐจํŠธ๋ฅผ ๊ทธ๋ฆด ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
st.stop()
fig = go.Figure()
for method in METHODS:
fig.add_trace(go.Scatter(
x=agg_df.index,
y=agg_df[method],
customdata=agg_df[['๊ธฐ์—… ์ˆ˜']].to_numpy(),
mode="markers",
marker=dict(size=12, color=COLORS[method], line=dict(width=1, color="white")),
name=method,
hovertemplate=f"<b>{agg_df.index.name}:</b> %{{x}}<br><b>{metric_sel}:</b> %{{y:.2f}}<br><b>๊ณ„์‚ฐ ๋ฐฉ์‹:</b> {method}<br><b>๊ธฐ์—… ์ˆ˜:</b> %{{customdata[0]}}<br><extra></extra>"
))
y_axis_title = f"{metric_sel} ({period_sel})"
fig.update_layout(
title=dict(text=f"{l2_label}๋ณ„ '{y_axis_title}' ๋น„๊ต (๊ณ„์‚ฐ ๋ฐฉ์‹๋ณ„)", x=0.5, xanchor='center'),
xaxis_title=l2_label,
yaxis_title=y_axis_title,
xaxis_tickangle=-45,
legend_title="๊ณ„์‚ฐ ๋ฐฉ์‹",
height=650,
hovermode="x unified"
)
st.plotly_chart(fig, use_container_width=True)
sel_list = [v for v in [classification_type, l1_selection, period_sel, metric_sel, country_sel if country_sel != "์ „์ฒด" else None, market_sel if market_sel != "์ „์ฒด" else None] if v and v != "์ „์ฒด"]
st.caption(" | ".join(sel_list) + f" โ€ข ๊ทธ๋ฃน ์ˆ˜: {len(agg_df):,}")