Spaces:

ever-flow
/

visualization_modules

Build error

App Files Files Community

visualization_modules / src /visualizations /scatter.py

ever-flow

Upload 12 files

bd24fca verified 11 months ago

Raw

History Blame Contribute Delete

5.9 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import plotly.graph_objects as go

	from config import PERIODS, METHODS, COLORS


	def show_scatter(DF_RAW):
	st.header("Scatter Plot")

	METRIC_HIERARCHY = {
	"Valuation": ["PER", "PBR", "EV_EBITDA", "시가총액/매출액", "시가총액/영업이익"],
	"Profitability": ["ROE", "영업이익률", "EBITDA/Sales", "총자산이익률"],
	"Activity": ["자산회전율"],
	"Stability": ["자기자본비율", "부채비율"]
	}

	with st.sidebar:
	st.markdown("### 분류")
	classification_type = st.radio("분석 기준", ["EMSEC", "EMTEC"], horizontal=True, key="class_type2")
	cls_df = DF_RAW.copy() # Classification 열을 직접 사용하지 않음
	if classification_type == "EMSEC":
	l1_options = ["전체"] + sorted(cls_df["Sector"].dropna().unique())
	l1_label = "Sector"
	l1_selection = st.selectbox(l1_label, l1_options, key="sector_sel2")
	l2_label = "Industry"
	else:
	l1_options = ["전체"] + sorted(cls_df["Theme"].dropna().unique())
	l1_label = "Theme"
	l1_selection = st.selectbox(l1_label, l1_options, key="theme_sel2")
	l2_label = "Technology"

	st.markdown("### 설정")
	period_sel = st.selectbox("기간", PERIODS, key="period_sel2")
	metric_group = st.selectbox("지표 그룹", list(METRIC_HIERARCHY.keys()), key="metric_group2")
	metric_options = METRIC_HIERARCHY[metric_group]
	metric_sel = st.selectbox("지표", metric_options, key="metric_sel2")
	country_sel = st.selectbox("국가", ["전체", "한국", "미국", "일본"], key="country_sel2")
	market_pool_options = {
	"전체": ["전체"] + sorted(cls_df["Market"].dropna().unique()),
	"한국": ["전체", "KOSPI", "KOSDAQ"],
	"미국": ["전체", "NASDAQ", "NYSE"],
	"일본": ["전체", "Prime (Domestic Stocks)", "Standard (Domestic Stocks)", "Prime (Foreign Stocks)"],
	}
	market_sel = st.selectbox("거래소", market_pool_options.get(country_sel, ["전체"]), key="market_sel2")

	def filter_data(df: pd.DataFrame) -> pd.DataFrame:
	d = df[df.Year == period_sel].copy()
	if classification_type == "EMSEC":
	if l1_selection != "전체":
	d = d[d["Sector"] == l1_selection]
	else:
	if l1_selection != "전체":
	d = d[d["Theme"] == l1_selection]
	if country_sel != "전체":
	d = d[d["Country"] == country_sel]
	if market_sel != "전체":
	d = d[d["Market"] == market_sel]
	return d

	FILT_DATA = filter_data(DF_RAW)
	metric_col = metric_sel
	if metric_col not in FILT_DATA.columns:
	st.error(f"'{metric_col}' 열이 데이터에 없습니다. 데이터나 설정을 확인해주세요.")
	st.stop()

	def harmonic_mean(arr: pd.Series):
	arr = arr.dropna()
	arr = arr[arr > 0]
	return len(arr) / (1 / arr).sum() if len(arr) > 0 else np.nan

	def aggregate_by_group(sub: pd.DataFrame, metric_col: str) -> pd.Series:
	sub_unique = sub.drop_duplicates(subset=["ticker"])
	arr = pd.to_numeric(sub_unique[metric_col], errors="coerce")
	res = {
	"AVG": arr.mean(),
	"MED": arr.median(),
	"HRM": harmonic_mean(arr)
	}
	num, den = None, None
	if metric_sel == "PER":
	num = sub_unique["Market Cap (2024-12-31)_USD"].sum()
	den = sub_unique["Net_Income"].sum()
	elif metric_sel == "PBR":
	num = sub_unique["Market Cap (2024-12-31)_USD"].sum()
	den = sub_unique["Book"].sum()
	elif metric_sel == "EV_EBITDA":
	num = sub_unique["Enterprise Value (FQ0)_USD"].sum()
	den = sub_unique["EBITDA"].sum()
	if num is not None and den is not None and den != 0:
	res["AGG"] = num / den
	else:
	res["AGG"] = res["AVG"]
	res["기업 수"] = len(arr.dropna())
	return pd.Series(res)

	if FILT_DATA.empty:
	st.warning("선택하신 조건에 맞는 데이터가 없습니다.")
	st.stop()

	agg_df = FILT_DATA.groupby(l2_label).apply(lambda g: aggregate_by_group(g, metric_col))
	agg_df = agg_df.dropna(how='all', subset=METHODS).sort_index()

	st.caption(f"분석 기준: {classification_type} > {l1_selection}")
	if agg_df.empty:
	st.warning("집계 결과 데이터가 없어 차트를 그릴 수 없습니다.")
	st.stop()

	fig = go.Figure()
	for method in METHODS:
	fig.add_trace(go.Scatter(
	x=agg_df.index,
	y=agg_df[method],
	customdata=agg_df[['기업 수']].to_numpy(),
	mode="markers",
	marker=dict(size=12, color=COLORS[method], line=dict(width=1, color="white")),
	name=method,
	hovertemplate=f"<b>{agg_df.index.name}:</b> %{{x}}<br><b>{metric_sel}:</b> %{{y:.2f}}<br><b>계산 방식:</b> {method}<br><b>기업 수:</b> %{{customdata[0]}}<br><extra></extra>"
	))
	y_axis_title = f"{metric_sel} ({period_sel})"
	fig.update_layout(
	title=dict(text=f"{l2_label}별 '{y_axis_title}' 비교 (계산 방식별)", x=0.5, xanchor='center'),
	xaxis_title=l2_label,
	yaxis_title=y_axis_title,
	xaxis_tickangle=-45,
	legend_title="계산 방식",
	height=650,
	hovermode="x unified"
	)
	st.plotly_chart(fig, use_container_width=True)
	sel_list = [v for v in [classification_type, l1_selection, period_sel, metric_sel, country_sel if country_sel != "전체" else None, market_sel if market_sel != "전체" else None] if v and v != "전체"]
	st.caption(" \| ".join(sel_list) + f" • 그룹 수: {len(agg_df):,}")