Spaces:
Build error
Build error
File size: 6,443 Bytes
bd24fca | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 | import streamlit as st
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from typing import List
def show_scale_heatmap(DF_RAW):
st.header("규모 변수 Heatmap")
with st.sidebar:
year_sel = st.selectbox("기준 연도", ["LTM","LTM-1","LTM-2","LTM-3"], key="year_sel4")
market_sel = st.selectbox("상장시장", ["한국 전체","KOSPI","KOSDAQ","미국 전체","NASDAQ","일본 전체","Prime (Domestic Stocks)","Standard (Domestic Stocks)","Prime (Foreign Stocks)"], key="market_sel4")
country_filter = market_filter = None
if "전체" in market_sel:
country_filter = market_sel.split()[0]
else:
country_filter = "한국" if market_sel in ("KOSPI","KOSDAQ") else "미국" if market_sel == "NASDAQ" else "일본"
market_filter = market_sel
class_type = st.radio("분류 체계", ["EMSEC","EMTEC"], horizontal=True, key="class_type4")
if class_type == "EMSEC":
sectors = sorted([s for s in DF_RAW.Sector.dropna().unique() if s != 'Unclassified'])
sector_sel = st.selectbox("Sector", ["전체"] + sectors, key="sector_sel4")
if sector_sel != "전체":
indus = sorted(DF_RAW.loc[DF_RAW.Sector == sector_sel, "Industry"].dropna().unique())
industry_sel = st.selectbox("Industry", ["전체"] + indus, key="industry_sel4")
else:
industry_sel = "전체"
row_level = "Sector" if sector_sel == "전체" else "Industry" if industry_sel == "전체" else "Sub_industry"
else:
themes = sorted([t for t in DF_RAW.Theme.dropna().unique() if t != 'Unclassified'])
theme_sel = st.selectbox("Theme", ["전체"] + themes, key="theme_sel4")
if theme_sel != "전체":
techs = sorted(DF_RAW.loc[DF_RAW.Theme == theme_sel, "Technology"].dropna().unique())
tech_sel = st.selectbox("Technology", ["전체"] + techs, key="tech_sel4")
else:
tech_sel = "전체"
row_level = "Theme" if theme_sel == "전체" else "Technology" if tech_sel == "전체" else "Sub_Technology"
metric_base = {"시가총액": "Market Cap (2024-12-31)_USD", "자산총계": "Assets", "매출액": "Sales"}
metric_name = st.selectbox("계측값", list(metric_base.keys()), key="metric_name4")
metric_col = metric_base[metric_name]
DF = DF_RAW[DF_RAW.Year == year_sel].copy()
if country_filter: DF = DF[DF.Country == country_filter]
if market_filter: DF = DF[DF.Market == market_filter]
if class_type == "EMSEC":
if sector_sel != "전체": DF = DF[DF.Sector == sector_sel]
if industry_sel != "전체": DF = DF[DF.Industry == industry_sel]
else:
if theme_sel != "전체": DF = DF[DF.Theme == theme_sel]
if tech_sel != "전체": DF = DF[DF.Technology == tech_sel]
if metric_col not in DF.columns:
st.error(f"'{metric_name}' 열이 없습니다. 데이터 파일을 확인해주세요.")
st.stop()
DF["metric_bil"] = DF[metric_col] / 1e9
DF = DF[DF["metric_bil"].notna()]
DF = DF[DF["metric_bil"] >= 0]
if not DF.empty:
max_th = DF["metric_bil"].quantile(0.999)
DF = DF[DF["metric_bil"] <= max_th]
if DF.empty:
st.warning("조건에 맞는 데이터가 없습니다.")
st.stop()
country = DF['Country'].unique()[0] if len(DF['Country'].unique()) == 1 else 'Unclassified'
currency = {'한국': 'KRW', '미국': 'USD', '일본': 'JPY', 'Unclassified': 'USD'}.get(country, 'USD')
valid_vals = DF["metric_bil"]
vl_max = valid_vals.max() if not valid_vals.empty else 0
def make_edges(max_val: float) -> List[float]:
base = [10,30,60,100,300,600]
edges = [0]
if max_val <= 0:
edges += base[:1]
else:
exp = 0
while True:
factor = 10 ** exp
for b in base:
edge = b * factor
if edge > max_val:
edges = sorted(set(edges))
return edges + [np.inf]
edges.append(edge)
exp += 1
edges = sorted(set(edges))
return edges + [np.inf]
bin_edges = make_edges(vl_max)
bin_labels = ["0~"] + [f"{int(e):,}~" for e in bin_edges[1:-1]]
DF["metric_bin"] = pd.cut(DF["metric_bil"], bins=bin_edges, labels=bin_labels, right=False)
pivot = DF.groupby([row_level,"metric_bin"])["Company"].nunique().unstack(fill_value=0).reindex(columns=bin_labels, fill_value=0).astype(int)
if pivot.empty:
st.warning("조건에 맞는 데이터가 없어 집계표를 생성할 수 없습니다.")
st.stop()
subtotal = pd.DataFrame(pivot.sum()).T
subtotal.index = ["Subtotal"]
pivot_full = pd.concat([subtotal, pivot])
rows = pivot_full.index.tolist()
cols = pivot_full.columns.tolist()
z_data = pivot_full.values
z_max = np.nanmax(z_data)
fig = go.Figure()
fig.add_trace(go.Heatmap(
z=z_data,
x=cols,
y=rows,
colorscale="Greens",
colorbar=dict(title="기업 수"),
hovertemplate="%{y}/%{x}<br>기업수: %{z:,}<extra></extra>",
xgap=1,
ygap=1
))
annotations = []
for i, row in enumerate(rows):
for j, col in enumerate(cols):
val = z_data[i, j]
if val > 0:
color = "white" if val > z_max * 0.5 else "black"
annotations.append(
dict(
x=col,
y=row,
text=f"{int(val):,}",
showarrow=False,
font=dict(color=color, size=12)
)
)
fig.update_layout(
annotations=annotations,
height=max(600, 40*len(rows)),
title=f"{metric_name} 분포 ({row_level}, {currency} billion)",
xaxis_title="규모 범위",
yaxis_title=row_level,
xaxis=dict(side="top"),
yaxis=dict(autorange="reversed")
)
st.plotly_chart(fig, use_container_width=True)
st.caption(f"기업 수: {DF['Company'].nunique():,} | 통화: {currency}")
with st.expander("📋 원본 집계표 보기", False):
st.dataframe(pivot_full, use_container_width=True)
|