File size: 6,443 Bytes
bd24fca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import streamlit as st
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from typing import List


def show_scale_heatmap(DF_RAW):
    st.header("규모 변수 Heatmap")
    
    with st.sidebar:
        year_sel = st.selectbox("기준 연도", ["LTM","LTM-1","LTM-2","LTM-3"], key="year_sel4")
        market_sel = st.selectbox("상장시장", ["한국 전체","KOSPI","KOSDAQ","미국 전체","NASDAQ","일본 전체","Prime (Domestic Stocks)","Standard (Domestic Stocks)","Prime (Foreign Stocks)"], key="market_sel4")
        country_filter = market_filter = None
        if "전체" in market_sel:
            country_filter = market_sel.split()[0]
        else:
            country_filter = "한국" if market_sel in ("KOSPI","KOSDAQ") else "미국" if market_sel == "NASDAQ" else "일본"
            market_filter = market_sel
        class_type = st.radio("분류 체계", ["EMSEC","EMTEC"], horizontal=True, key="class_type4")
        if class_type == "EMSEC":
            sectors = sorted([s for s in DF_RAW.Sector.dropna().unique() if s != 'Unclassified'])
            sector_sel = st.selectbox("Sector", ["전체"] + sectors, key="sector_sel4")
            if sector_sel != "전체":
                indus = sorted(DF_RAW.loc[DF_RAW.Sector == sector_sel, "Industry"].dropna().unique())
                industry_sel = st.selectbox("Industry", ["전체"] + indus, key="industry_sel4")
            else:
                industry_sel = "전체"
            row_level = "Sector" if sector_sel == "전체" else "Industry" if industry_sel == "전체" else "Sub_industry"
        else:
            themes = sorted([t for t in DF_RAW.Theme.dropna().unique() if t != 'Unclassified'])
            theme_sel = st.selectbox("Theme", ["전체"] + themes, key="theme_sel4")
            if theme_sel != "전체":
                techs = sorted(DF_RAW.loc[DF_RAW.Theme == theme_sel, "Technology"].dropna().unique())
                tech_sel = st.selectbox("Technology", ["전체"] + techs, key="tech_sel4")
            else:
                tech_sel = "전체"
            row_level = "Theme" if theme_sel == "전체" else "Technology" if tech_sel == "전체" else "Sub_Technology"
        metric_base = {"시가총액": "Market Cap (2024-12-31)_USD", "자산총계": "Assets", "매출액": "Sales"}
        metric_name = st.selectbox("계측값", list(metric_base.keys()), key="metric_name4")
        metric_col = metric_base[metric_name]

    DF = DF_RAW[DF_RAW.Year == year_sel].copy()
    if country_filter: DF = DF[DF.Country == country_filter]
    if market_filter: DF = DF[DF.Market == market_filter]
    if class_type == "EMSEC":
        if sector_sel != "전체": DF = DF[DF.Sector == sector_sel]
        if industry_sel != "전체": DF = DF[DF.Industry == industry_sel]
    else:
        if theme_sel != "전체": DF = DF[DF.Theme == theme_sel]
        if tech_sel != "전체": DF = DF[DF.Technology == tech_sel]
    if metric_col not in DF.columns:
        st.error(f"'{metric_name}' 열이 없습니다. 데이터 파일을 확인해주세요.")
        st.stop()
    DF["metric_bil"] = DF[metric_col] / 1e9
    DF = DF[DF["metric_bil"].notna()]
    DF = DF[DF["metric_bil"] >= 0]
    if not DF.empty:
        max_th = DF["metric_bil"].quantile(0.999)
        DF = DF[DF["metric_bil"] <= max_th]
    if DF.empty:
        st.warning("조건에 맞는 데이터가 없습니다.")
        st.stop()

    country = DF['Country'].unique()[0] if len(DF['Country'].unique()) == 1 else 'Unclassified'
    currency = {'한국': 'KRW', '미국': 'USD', '일본': 'JPY', 'Unclassified': 'USD'}.get(country, 'USD')

    valid_vals = DF["metric_bil"]
    vl_max = valid_vals.max() if not valid_vals.empty else 0
    def make_edges(max_val: float) -> List[float]:
        base = [10,30,60,100,300,600]
        edges = [0]
        if max_val <= 0:
            edges += base[:1]
        else:
            exp = 0
            while True:
                factor = 10 ** exp
                for b in base:
                    edge = b * factor
                    if edge > max_val:
                        edges = sorted(set(edges))
                        return edges + [np.inf]
                    edges.append(edge)
                exp += 1
        edges = sorted(set(edges))
        return edges + [np.inf]
    bin_edges = make_edges(vl_max)
    bin_labels = ["0~"] + [f"{int(e):,}~" for e in bin_edges[1:-1]]
    DF["metric_bin"] = pd.cut(DF["metric_bil"], bins=bin_edges, labels=bin_labels, right=False)

    pivot = DF.groupby([row_level,"metric_bin"])["Company"].nunique().unstack(fill_value=0).reindex(columns=bin_labels, fill_value=0).astype(int)
    if pivot.empty:
        st.warning("조건에 맞는 데이터가 없어 집계표를 생성할 수 없습니다.")
        st.stop()
    subtotal = pd.DataFrame(pivot.sum()).T
    subtotal.index = ["Subtotal"]
    pivot_full = pd.concat([subtotal, pivot])

    rows = pivot_full.index.tolist()
    cols = pivot_full.columns.tolist()
    z_data = pivot_full.values
    z_max = np.nanmax(z_data)

    fig = go.Figure()
    fig.add_trace(go.Heatmap(
        z=z_data,
        x=cols,
        y=rows,
        colorscale="Greens",
        colorbar=dict(title="기업 수"),
        hovertemplate="%{y}/%{x}<br>기업수: %{z:,}<extra></extra>",
        xgap=1,
        ygap=1
    ))
    annotations = []
    for i, row in enumerate(rows):
        for j, col in enumerate(cols):
            val = z_data[i, j]
            if val > 0:
                color = "white" if val > z_max * 0.5 else "black"
                annotations.append(
                    dict(
                        x=col,
                        y=row,
                        text=f"{int(val):,}",
                        showarrow=False,
                        font=dict(color=color, size=12)
                    )
                )
    fig.update_layout(
        annotations=annotations,
        height=max(600, 40*len(rows)),
        title=f"{metric_name} 분포 ({row_level}, {currency} billion)",
        xaxis_title="규모 범위",
        yaxis_title=row_level,
        xaxis=dict(side="top"),
        yaxis=dict(autorange="reversed")
    )
    st.plotly_chart(fig, use_container_width=True)
    st.caption(f"기업 수: {DF['Company'].nunique():,} | 통화: {currency}")
    with st.expander("📋 원본 집계표 보기", False):
        st.dataframe(pivot_full, use_container_width=True)