Rajveer Pall commited on
Commit
62f4371
·
verified ·
1 Parent(s): ca4fe7d

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +1012 -0
app.py ADDED
@@ -0,0 +1,1012 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ FinSight Dashboard — LLM-Powered Earnings Intelligence
3
+ Stage 5: Production Streamlit Dashboard
4
+
5
+ Pages:
6
+ 1. Overview — project summary, pipeline, key stats
7
+ 2. Model Results — walk-forward IC/AUC comparison, year-by-year
8
+ 3. SHAP Analysis — interactive feature importance
9
+ 4. Backtest — equity curve, drawdown, quarterly P&L
10
+ 5. Explorer — browse transcripts with live sentiment
11
+
12
+ Run:
13
+ streamlit run src/dashboard/app.py
14
+ """
15
+
16
+ import sys
17
+ from pathlib import Path
18
+ import warnings
19
+ warnings.filterwarnings("ignore")
20
+
21
+ import numpy as np
22
+ import pandas as pd
23
+ import plotly.express as px
24
+ import plotly.graph_objects as go
25
+ from plotly.subplots import make_subplots
26
+ import streamlit as st
27
+
28
+ sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
29
+ from config import PROCESSED_DIR, EXPERIMENTS_DIR
30
+
31
+ # ── Page config ────────────────────────────────────────────────────────────────
32
+
33
+ st.set_page_config(
34
+ page_title="FinSight | Earnings Intelligence",
35
+ page_icon="📈",
36
+ layout="wide",
37
+ initial_sidebar_state="expanded",
38
+ )
39
+
40
+ # ── Global CSS ─────────────────────────────────────────────────────────────────
41
+
42
+ st.markdown("""
43
+ <style>
44
+ [data-testid="stAppViewContainer"] { background: #0a0e1a; color: #e8eaf6; }
45
+ [data-testid="stSidebar"] {
46
+ background: #0d1117;
47
+ border-right: 1px solid #1e2433;
48
+ }
49
+ [data-testid="stSidebar"] .stRadio label {
50
+ color: #8892b0 !important;
51
+ font-size: 0.9rem;
52
+ }
53
+ .metric-card {
54
+ background: linear-gradient(135deg, #0d1117 0%, #161b27 100%);
55
+ border: 1px solid #1e2d4a;
56
+ border-radius: 12px;
57
+ padding: 20px 24px;
58
+ text-align: center;
59
+ transition: border-color 0.2s;
60
+ }
61
+ .metric-card:hover { border-color: #3d5a99; }
62
+ .metric-value { font-size: 2rem; font-weight: 700; color: #64b5f6; line-height: 1.1; }
63
+ .metric-label {
64
+ font-size: 0.78rem; color: #8892b0;
65
+ text-transform: uppercase; letter-spacing: 1px; margin-top: 6px;
66
+ }
67
+ .metric-delta { font-size: 0.82rem; margin-top: 4px; }
68
+ .delta-pos { color: #66bb6a; }
69
+ .delta-neg { color: #ef5350; }
70
+ .delta-neu { color: #8892b0; }
71
+ .section-header {
72
+ font-size: 1.4rem; font-weight: 700; color: #e8eaf6;
73
+ border-left: 4px solid #3d5a99; padding-left: 12px;
74
+ margin: 28px 0 16px 0;
75
+ }
76
+ .subsection { font-size: 1rem; font-weight: 600; color: #8892b0; margin: 16px 0 8px 0; }
77
+ .hero-title {
78
+ font-size: 2.8rem; font-weight: 800;
79
+ background: linear-gradient(90deg, #64b5f6, #7c4dff, #64b5f6);
80
+ background-size: 200%;
81
+ -webkit-background-clip: text; -webkit-text-fill-color: transparent;
82
+ line-height: 1.2;
83
+ }
84
+ .hero-sub {
85
+ font-size: 1.1rem; color: #8892b0; margin-top: 8px;
86
+ max-width: 680px; line-height: 1.6;
87
+ }
88
+ .pipeline-step {
89
+ background: #0d1117; border: 1px solid #1e2433;
90
+ border-radius: 10px; padding: 14px 16px; text-align: center;
91
+ }
92
+ .pipeline-icon { font-size: 1.6rem; }
93
+ .pipeline-label { font-size: 0.78rem; color: #8892b0; margin-top: 4px; }
94
+ .pipeline-title { font-size: 0.9rem; font-weight: 600; color: #cfd8dc; }
95
+ .insight-box {
96
+ background: #0d1117; border-left: 3px solid #3d5a99;
97
+ border-radius: 0 8px 8px 0; padding: 12px 16px; margin: 8px 0;
98
+ font-size: 0.88rem; color: #b0bec5; line-height: 1.6;
99
+ }
100
+ .insight-box strong { color: #64b5f6; }
101
+ .badge {
102
+ display: inline-block; padding: 2px 10px; border-radius: 20px;
103
+ font-size: 0.72rem; font-weight: 600; margin: 2px;
104
+ }
105
+ .badge-blue { background: #1a237e22; color: #64b5f6; border: 1px solid #1a237e; }
106
+ .badge-green { background: #1b5e2022; color: #66bb6a; border: 1px solid #1b5e20; }
107
+ .badge-red { background: #b71c1c22; color: #ef9a9a; border: 1px solid #b71c1c; }
108
+ hr { border-color: #1e2433 !important; }
109
+ ::-webkit-scrollbar { width: 6px; }
110
+ ::-webkit-scrollbar-track { background: #0a0e1a; }
111
+ ::-webkit-scrollbar-thumb { background: #1e2d4a; border-radius: 3px; }
112
+ </style>
113
+ """, unsafe_allow_html=True)
114
+
115
+
116
+ # ── Layout helper — avoids duplicate xaxis/yaxis conflicts ────────────────────
117
+
118
+ BASE_LAYOUT = dict(
119
+ paper_bgcolor="#0d1117",
120
+ plot_bgcolor="#0a0e1a",
121
+ font=dict(color="#b0bec5", family="Inter, sans-serif"),
122
+ margin=dict(l=50, r=30, t=50, b=50),
123
+ colorway=["#64b5f6","#66bb6a","#ffa726","#ef5350","#ab47bc","#26c6da"],
124
+ )
125
+ BASE_XAXIS = dict(gridcolor="#1a2035", linecolor="#1e2433", zerolinecolor="#1e2433")
126
+ BASE_YAXIS = dict(gridcolor="#1a2035", linecolor="#1e2433", zerolinecolor="#1e2433")
127
+
128
+ def L(**kwargs):
129
+ """
130
+ Merge base dark-theme layout with chart-specific overrides.
131
+ Merges xaxis/yaxis dicts instead of replacing them, which avoids
132
+ the 'multiple values for keyword argument xaxis' TypeError.
133
+ """
134
+ out = dict(**BASE_LAYOUT)
135
+ if "xaxis" in kwargs:
136
+ out["xaxis"] = {**BASE_XAXIS, **kwargs.pop("xaxis")}
137
+ else:
138
+ out["xaxis"] = BASE_XAXIS
139
+ if "yaxis" in kwargs:
140
+ out["yaxis"] = {**BASE_YAXIS, **kwargs.pop("yaxis")}
141
+ else:
142
+ out["yaxis"] = BASE_YAXIS
143
+ out.update(kwargs)
144
+ return out
145
+
146
+
147
+ # ── Global helpers ─────────────────────────────────────────────────────────────
148
+
149
+ def metric_card(col, value, label, delta="", delta_type="neu"):
150
+ """Render a dark-theme KPI card inside a Streamlit column."""
151
+ col.markdown(f"""
152
+ <div class='metric-card'>
153
+ <div class='metric-value'>{value}</div>
154
+ <div class='metric-label'>{label}</div>
155
+ <div class='metric-delta delta-{delta_type}'>{delta}</div>
156
+ </div>""", unsafe_allow_html=True)
157
+
158
+
159
+ # ── Data loaders ───────────────────────────────────────────────────────────────
160
+
161
+ @st.cache_data(show_spinner=False)
162
+ def load_feature_matrix():
163
+ p = PROCESSED_DIR / "feature_matrix.parquet"
164
+ return pd.read_parquet(p) if p.exists() else pd.DataFrame()
165
+
166
+ @st.cache_data(show_spinner=False)
167
+ def load_model_results():
168
+ p = EXPERIMENTS_DIR / "model_results.csv"
169
+ return pd.read_csv(p) if p.exists() else pd.DataFrame()
170
+
171
+ @st.cache_data(show_spinner=False)
172
+ def load_backtest():
173
+ p = EXPERIMENTS_DIR / "backtest_results.csv"
174
+ return pd.read_csv(p) if p.exists() else pd.DataFrame()
175
+
176
+ @st.cache_data(show_spinner=False)
177
+ def load_shap():
178
+ p = EXPERIMENTS_DIR / "shap_values.parquet"
179
+ return pd.read_parquet(p) if p.exists() else pd.DataFrame()
180
+
181
+
182
+ # ── Sidebar ────────────────────────────────────────────────────────────────────
183
+
184
+ with st.sidebar:
185
+ st.markdown("""
186
+ <div style='padding:12px 0 20px 0;'>
187
+ <div style='font-size:1.5rem;font-weight:800;color:#64b5f6;'>📈 FinSight</div>
188
+ <div style='font-size:0.75rem;color:#8892b0;margin-top:4px;'>
189
+ LLM-Powered Earnings Intelligence
190
+ </div>
191
+ </div>
192
+ """, unsafe_allow_html=True)
193
+
194
+ page = st.radio(
195
+ "Navigation",
196
+ ["🏠 Overview",
197
+ "📊 Model Performance",
198
+ "🔍 Feature Importance",
199
+ "💹 Backtest Results",
200
+ "🔎 Transcript Explorer"],
201
+ label_visibility="collapsed",
202
+ )
203
+
204
+ st.markdown("<hr>", unsafe_allow_html=True)
205
+ st.markdown("""
206
+ <div style='font-size:0.72rem;color:#8892b0;line-height:1.8;'>
207
+ <b style='color:#cfd8dc;'>Stack</b><br>
208
+ FinBERT · ChromaDB · XGBoost<br>
209
+ LightGBM · SHAP · Streamlit<br><br>
210
+ <b style='color:#cfd8dc;'>Data</b><br>
211
+ 14,584 earnings transcripts<br>
212
+ 601 S&amp;P 500 companies<br>
213
+ 2018 – 2024<br><br>
214
+ <b style='color:#cfd8dc;'>Author</b><br>
215
+ Rajveer Singh Pall
216
+ </div>
217
+ """, unsafe_allow_html=True)
218
+
219
+
220
+ # ═══════════════════════════════════════════════════════════════════════════════
221
+ # PAGE 1 — OVERVIEW
222
+ # ═══════════════════════════════════════════════════════════════════════════════
223
+
224
+ if page == "🏠 Overview":
225
+ fm = load_feature_matrix()
226
+ mr = load_model_results()
227
+
228
+ st.markdown("""
229
+ <div style='padding:24px 0 8px 0;'>
230
+ <div class='hero-title'>FinSight</div>
231
+ <div class='hero-title' style='font-size:1.8rem;color:#7c4dff;'>
232
+ Earnings Intelligence System
233
+ </div>
234
+ <div class='hero-sub'>
235
+ An end-to-end machine learning pipeline that extracts alpha signals
236
+ from S&amp;P 500 earnings call transcripts using FinBERT sentiment analysis,
237
+ RAG-based structured feature extraction, and walk-forward validated
238
+ gradient boosting models.
239
+ </div>
240
+ </div>
241
+ """, unsafe_allow_html=True)
242
+
243
+ st.markdown("<hr>", unsafe_allow_html=True)
244
+
245
+ best_ic = float(mr["ic"].max()) if not mr.empty else 0.0198
246
+ best_auc = float(mr["auc"].max()) if not mr.empty else 0.5201
247
+ best_hr = float(mr["hit_rate"].max()) if not mr.empty else 0.5427
248
+ n_rows = len(fm) if not fm.empty else 13442
249
+
250
+ c1,c2,c3,c4,c5 = st.columns(5)
251
+ metric_card(c1, "14,584", "Transcripts", "601 companies", "neu")
252
+ metric_card(c2, f"{n_rows:,}", "Training Samples", "2018–2024", "neu")
253
+ metric_card(c3, f"{best_ic:.4f}", "Best IC", "LightGBM", "pos")
254
+ metric_card(c4, f"{best_auc:.4f}","Best AUC", "XGBoost 2024", "pos")
255
+ metric_card(c5, f"{best_hr:.4f}", "Best Hit Rate", "Walk-forward", "pos")
256
+
257
+ st.markdown("<br>", unsafe_allow_html=True)
258
+
259
+ # Pipeline
260
+ st.markdown("<div class='section-header'>System Architecture</div>",
261
+ unsafe_allow_html=True)
262
+ steps = [
263
+ ("🗄️","Stage 1","Data Ingestion", "SEC EDGAR · yfinance\n14,584 transcripts"),
264
+ ("🧠","Stage 2","NLP Pipeline", "FinBERT · ChromaDB RAG\n34 features"),
265
+ ("🤖","Stage 3","ML Models", "XGBoost · LightGBM\nWalk-forward CV"),
266
+ ("📉","Stage 4","Backtesting", "Long-short strategy\n10bps TC"),
267
+ ("🖥️","Stage 5","Dashboard", "Streamlit · Plotly\nHugging Face Spaces"),
268
+ ]
269
+ cols = st.columns(len(steps))
270
+ for col, (icon, stage, title, desc) in zip(cols, steps):
271
+ col.markdown(f"""
272
+ <div class='pipeline-step'>
273
+ <div class='pipeline-icon'>{icon}</div>
274
+ <div class='pipeline-label'>{stage}</div>
275
+ <div class='pipeline-title'>{title}</div>
276
+ <div style='font-size:0.72rem;color:#546e7a;margin-top:4px;line-height:1.5;'>
277
+ {desc}</div>
278
+ </div>""", unsafe_allow_html=True)
279
+
280
+ st.markdown("<br>", unsafe_allow_html=True)
281
+
282
+ left, right = st.columns([1.1, 1])
283
+
284
+ with left:
285
+ st.markdown("<div class='section-header'>Key Findings</div>",
286
+ unsafe_allow_html=True)
287
+ for f in [
288
+ "<strong>Analyst negativity &gt; management positivity.</strong> "
289
+ "qa_neg_ratio (SHAP=0.054) is the single strongest feature. "
290
+ "Analyst pushback in Q&amp;A contains more information than prepared remarks.",
291
+
292
+ "<strong>NLP reduces prediction variance by 87%.</strong> "
293
+ "Baseline IC std=0.114 vs LightGBM std=0.009 — "
294
+ "far more consistent across years.",
295
+
296
+ "<strong>Consistent with weak-form EMH.</strong> "
297
+ "Positive IC (0.0198) exists but cannot overcome 10bps transaction "
298
+ "costs at a 5-day holding period.",
299
+
300
+ "<strong>RAG guidance relevance is top-5.</strong> "
301
+ "Semantic relevance of the guidance section — not just its content — "
302
+ "carries significant predictive signal.",
303
+ ]:
304
+ st.markdown(f"<div class='insight-box'>{f}</div>",
305
+ unsafe_allow_html=True)
306
+
307
+ with right:
308
+ st.markdown("<div class='section-header'>Dataset Coverage</div>",
309
+ unsafe_allow_html=True)
310
+ if not fm.empty:
311
+ yr = fm.groupby("year").size().reset_index(name="count")
312
+ fig = go.Figure(go.Bar(
313
+ x=yr["year"].astype(str),
314
+ y=yr["count"],
315
+ marker=dict(color=yr["count"],
316
+ colorscale=[[0,"#1a237e"],[1,"#64b5f6"]],
317
+ showscale=False),
318
+ text=yr["count"], textposition="outside",
319
+ textfont=dict(size=11),
320
+ ))
321
+ fig.update_layout(**L(title="Transcript Count by Year", height=300,
322
+ showlegend=False,
323
+ xaxis=dict(title="Year"),
324
+ yaxis=dict(title="Transcripts")))
325
+ st.plotly_chart(fig, use_container_width=True)
326
+
327
+ # Sentiment heatmap
328
+ if not fm.empty and "mgmt_net_sentiment" in fm.columns:
329
+ st.markdown("<div class='section-header'>Sentiment Landscape</div>",
330
+ unsafe_allow_html=True)
331
+ heat = (fm.groupby(["ticker","year"])["mgmt_net_sentiment"]
332
+ .mean().reset_index())
333
+ top_t = fm["ticker"].value_counts().head(30).index
334
+ heat = heat[heat["ticker"].isin(top_t)]
335
+ pivot = heat.pivot(index="ticker", columns="year",
336
+ values="mgmt_net_sentiment")
337
+ fig2 = go.Figure(go.Heatmap(
338
+ z=pivot.values,
339
+ x=[str(c) for c in pivot.columns],
340
+ y=pivot.index,
341
+ colorscale=[[0,"#b71c1c"],[0.35,"#e53935"],
342
+ [0.5,"#263238"],[0.65,"#1565c0"],[1,"#64b5f6"]],
343
+ zmid=0,
344
+ colorbar=dict(title="Net Sentiment", tickfont=dict(size=10)),
345
+ hovertemplate="Ticker: %{y}<br>Year: %{x}<br>Sentiment: %{z:.3f}<extra></extra>",
346
+ ))
347
+ fig2.update_layout(**L(
348
+ title="Management Net Sentiment — Top 30 Tickers × Year",
349
+ height=500,
350
+ xaxis=dict(title="Year"),
351
+ yaxis=dict(title=""),
352
+ ))
353
+ st.plotly_chart(fig2, use_container_width=True)
354
+
355
+
356
+ # ═══════════════════════════════════════════════════════════════════════════════
357
+ # PAGE 2 — MODEL PERFORMANCE
358
+ # ═══════════════════════════════════════════════════════════════════════════════
359
+
360
+ elif page == "📊 Model Performance":
361
+ mr = load_model_results()
362
+
363
+ st.markdown("<div class='hero-title' style='font-size:2rem;'>Model Performance</div>",
364
+ unsafe_allow_html=True)
365
+ st.markdown("<div class='hero-sub'>Walk-forward validation (2021–2024). "
366
+ "Train on 3 prior years, test on held-out year. Zero data leakage.</div>",
367
+ unsafe_allow_html=True)
368
+ st.markdown("<hr>", unsafe_allow_html=True)
369
+
370
+ if mr.empty:
371
+ st.error("model_results.csv not found. Run Stage 3 first.")
372
+ st.stop()
373
+
374
+ summary = (
375
+ mr.groupby("model")[["ic","hit_rate","auc"]]
376
+ .agg({"ic":["mean","std"],"hit_rate":["mean","std"],"auc":["mean","std"]})
377
+ .round(4)
378
+ )
379
+ summary.columns = ["IC Mean","IC Std","Hit Rate Mean","Hit Rate Std",
380
+ "AUC Mean","AUC Std"]
381
+ summary = summary.sort_values("IC Mean", ascending=False)
382
+
383
+ st.markdown("<div class='section-header'>Model Comparison</div>",
384
+ unsafe_allow_html=True)
385
+
386
+ def color_ic(val):
387
+ if isinstance(val, float):
388
+ if val > 0.015: return "color: #66bb6a; font-weight:600"
389
+ if val < 0: return "color: #ef5350"
390
+ return ""
391
+
392
+ st.dataframe(
393
+ summary.style.applymap(color_ic, subset=["IC Mean"]).format("{:.4f}"),
394
+ use_container_width=True, height=220,
395
+ )
396
+
397
+ st.markdown("<br>", unsafe_allow_html=True)
398
+ st.markdown("<div class='section-header'>Information Coefficient by Year</div>",
399
+ unsafe_allow_html=True)
400
+
401
+ MODEL_COLORS = {
402
+ "Baseline": "#ffa726",
403
+ "FinBERT_only": "#26c6da",
404
+ "RAG_only": "#ab47bc",
405
+ "XGBoost_all": "#ef5350",
406
+ "LightGBM_all": "#66bb6a",
407
+ }
408
+
409
+ fig = go.Figure()
410
+ for m in mr["model"].unique():
411
+ sub = mr[mr["model"]==m].sort_values("test_year")
412
+ fig.add_trace(go.Scatter(
413
+ x=sub["test_year"].astype(int),
414
+ y=sub["ic"],
415
+ mode="lines+markers", name=m,
416
+ line=dict(color=MODEL_COLORS.get(m,"#64b5f6"), width=2.5),
417
+ marker=dict(size=9),
418
+ hovertemplate=f"<b>{m}</b><br>Year: %{{x}}<br>IC: %{{y:.4f}}<extra></extra>",
419
+ ))
420
+ fig.add_hline(y=0, line_dash="dash", line_color="#546e7a", line_width=1.2)
421
+ fig.update_layout(**L(
422
+ title="Walk-Forward IC — Positive = Predictive",
423
+ height=380,
424
+ xaxis=dict(tickvals=[2021,2022,2023,2024], title="Year"),
425
+ yaxis=dict(title="Information Coefficient"),
426
+ legend=dict(bgcolor="#0d1117", bordercolor="#1e2433", borderwidth=1),
427
+ ))
428
+ st.plotly_chart(fig, use_container_width=True)
429
+
430
+ col1, col2 = st.columns(2)
431
+
432
+ with col1:
433
+ st.markdown("<div class='subsection'>Hit Rate by Year</div>",
434
+ unsafe_allow_html=True)
435
+ fig2 = go.Figure()
436
+ for m in mr["model"].unique():
437
+ sub = mr[mr["model"]==m].sort_values("test_year")
438
+ fig2.add_trace(go.Scatter(
439
+ x=sub["test_year"].astype(int), y=sub["hit_rate"],
440
+ mode="lines+markers", name=m,
441
+ line=dict(color=MODEL_COLORS.get(m,"#64b5f6"), width=2),
442
+ marker=dict(size=7), showlegend=False,
443
+ ))
444
+ fig2.add_hline(y=0.5, line_dash="dot", line_color="#546e7a", line_width=1)
445
+ fig2.update_layout(**L(
446
+ height=300, title="Hit Rate (>0.5 = better than coin flip)",
447
+ xaxis=dict(tickvals=[2021,2022,2023,2024]),
448
+ yaxis=dict(title="Hit Rate"),
449
+ ))
450
+ st.plotly_chart(fig2, use_container_width=True)
451
+
452
+ with col2:
453
+ st.markdown("<div class='subsection'>AUC by Year</div>",
454
+ unsafe_allow_html=True)
455
+ fig3 = go.Figure()
456
+ for m in mr["model"].unique():
457
+ sub = mr[mr["model"]==m].sort_values("test_year")
458
+ fig3.add_trace(go.Scatter(
459
+ x=sub["test_year"].astype(int), y=sub["auc"],
460
+ mode="lines+markers", name=m,
461
+ line=dict(color=MODEL_COLORS.get(m,"#64b5f6"), width=2),
462
+ marker=dict(size=7), showlegend=False,
463
+ ))
464
+ fig3.add_hline(y=0.5, line_dash="dot", line_color="#546e7a", line_width=1)
465
+ fig3.update_layout(**L(
466
+ height=300, title="AUC-ROC (>0.5 = better than random)",
467
+ xaxis=dict(tickvals=[2021,2022,2023,2024]),
468
+ yaxis=dict(title="AUC"),
469
+ ))
470
+ st.plotly_chart(fig3, use_container_width=True)
471
+
472
+ st.markdown("<div class='section-header'>Stability Analysis — IC Variance</div>",
473
+ unsafe_allow_html=True)
474
+ ic_std = mr.groupby("model")["ic"].std().sort_values()
475
+ ic_mean = mr.groupby("model")["ic"].mean()
476
+ bar_colors = ["#66bb6a" if ic_mean[m] > 0 else "#ef5350" for m in ic_std.index]
477
+
478
+ fig4 = go.Figure(go.Bar(
479
+ y=ic_std.index, x=ic_std.values, orientation="h",
480
+ marker_color=bar_colors,
481
+ text=[f"σ={v:.4f}" for v in ic_std.values],
482
+ textposition="outside", textfont=dict(size=11),
483
+ ))
484
+ fig4.update_layout(**L(
485
+ title="IC Standard Deviation — Lower = More Consistent",
486
+ height=280,
487
+ xaxis=dict(title="IC Std Dev"),
488
+ yaxis=dict(title=""),
489
+ ))
490
+ st.plotly_chart(fig4, use_container_width=True)
491
+
492
+ st.markdown("""
493
+ <div class='insight-box'>
494
+ <strong>Interpretation:</strong> The Baseline's high IC mean (0.043) is
495
+ misleading — its std of 0.114 shows extreme instability driven by lucky
496
+ quarters. LightGBM achieves IC=0.020 with std=0.009, making it
497
+ <strong>10× more stable</strong>. In live trading, consistency matters
498
+ far more than occasional lucky peaks.
499
+ </div>
500
+ """, unsafe_allow_html=True)
501
+
502
+
503
+ # ═══════════════════════════════════════════════════════════════════════════════
504
+ # PAGE 3 — FEATURE IMPORTANCE
505
+ # ═══════════════════════════════════════════════════════════════════════════════
506
+
507
+ elif page == "🔍 Feature Importance":
508
+ shap_df = load_shap()
509
+ fm = load_feature_matrix()
510
+
511
+ st.markdown("<div class='hero-title' style='font-size:2rem;'>Feature Importance</div>",
512
+ unsafe_allow_html=True)
513
+ st.markdown("<div class='hero-sub'>SHAP values computed on LightGBM (best model). "
514
+ "Shows which features actually drive predictions.</div>",
515
+ unsafe_allow_html=True)
516
+ st.markdown("<hr>", unsafe_allow_html=True)
517
+
518
+ if shap_df.empty:
519
+ st.error("shap_values.parquet not found. Run run_shap.py first.")
520
+ st.stop()
521
+
522
+ mean_shap = shap_df.abs().mean().sort_values(ascending=False)
523
+
524
+ def feat_color(name):
525
+ if name.startswith("rag_"): return "#64b5f6"
526
+ if name.startswith("mgmt_"): return "#66bb6a"
527
+ if name.startswith("qa_"): return "#ffa726"
528
+ return "#ab47bc"
529
+
530
+ def feat_group(name):
531
+ if name.startswith("rag_"): return "RAG Features"
532
+ if name.startswith("mgmt_"): return "Management FinBERT"
533
+ if name.startswith("qa_"): return "QA FinBERT"
534
+ return "Other"
535
+
536
+ st.markdown("<div class='section-header'>Top 20 Features by Mean |SHAP|</div>",
537
+ unsafe_allow_html=True)
538
+ top20 = mean_shap.head(20)[::-1]
539
+ fig = go.Figure(go.Bar(
540
+ y=top20.index, x=top20.values, orientation="h",
541
+ marker_color=[feat_color(n) for n in top20.index],
542
+ text=[f"{v:.4f}" for v in top20.values],
543
+ textposition="outside", textfont=dict(size=10),
544
+ hovertemplate="<b>%{y}</b><br>Mean |SHAP|: %{x:.4f}<extra></extra>",
545
+ ))
546
+ fig.update_layout(**L(
547
+ height=520,
548
+ title="Feature Importance — 🔵 RAG | 🟢 Mgmt FinBERT | 🟠 QA FinBERT",
549
+ xaxis=dict(title="Mean |SHAP Value|"),
550
+ yaxis=dict(title=""),
551
+ ))
552
+ st.plotly_chart(fig, use_container_width=True)
553
+
554
+ col1, col2 = st.columns(2)
555
+
556
+ with col1:
557
+ st.markdown("<div class='section-header'>Importance by Feature Group</div>",
558
+ unsafe_allow_html=True)
559
+ gs = (mean_shap.reset_index()
560
+ .rename(columns={"index":"feature", 0:"shap"}))
561
+ gs.columns = ["feature","shap"]
562
+ gs["group"] = gs["feature"].apply(feat_group)
563
+ gt = gs.groupby("group")["shap"].sum()
564
+
565
+ fig2 = go.Figure(go.Pie(
566
+ labels=gt.index, values=gt.values, hole=0.55,
567
+ marker=dict(colors=["#64b5f6","#66bb6a","#ffa726","#ab47bc"]),
568
+ textinfo="label+percent", textfont=dict(size=12),
569
+ hovertemplate="<b>%{label}</b><br>Total SHAP: %{value:.4f}<br>%{percent}<extra></extra>",
570
+ ))
571
+ fig2.update_layout(**L(
572
+ height=320, showlegend=False,
573
+ annotations=[dict(text="SHAP<br>Groups", x=0.5, y=0.5,
574
+ font_size=13, showarrow=False,
575
+ font_color="#b0bec5")],
576
+ ))
577
+ st.plotly_chart(fig2, use_container_width=True)
578
+
579
+ with col2:
580
+ st.markdown("<div class='section-header'>SHAP vs Correlation with Target</div>",
581
+ unsafe_allow_html=True)
582
+ if not fm.empty and "target_5d_up" in fm.columns:
583
+ feat_cols = [c for c in shap_df.columns if c in fm.columns]
584
+ corrs = fm[feat_cols+["target_5d_up"]].corr()["target_5d_up"].drop("target_5d_up")
585
+ cdf = pd.DataFrame({
586
+ "feature": corrs.index,
587
+ "shap": mean_shap.reindex(corrs.index).fillna(0).values,
588
+ "corr": corrs.values,
589
+ "group": [feat_group(f) for f in corrs.index],
590
+ })
591
+ cmap = {
592
+ "RAG Features": "#64b5f6",
593
+ "Management FinBERT": "#66bb6a",
594
+ "QA FinBERT": "#ffa726",
595
+ "Other": "#ab47bc",
596
+ }
597
+ fig3 = px.scatter(
598
+ cdf, x="corr", y="shap", color="group",
599
+ color_discrete_map=cmap, hover_data=["feature"],
600
+ labels={"corr":"Pearson Corr with Target",
601
+ "shap":"Mean |SHAP Value|"},
602
+ height=320,
603
+ )
604
+ fig3.add_vline(x=0, line_dash="dash", line_color="#546e7a")
605
+ fig3.update_layout(**L(
606
+ title="SHAP Importance vs Linear Correlation",
607
+ showlegend=False,
608
+ ))
609
+ st.plotly_chart(fig3, use_container_width=True)
610
+
611
+ st.markdown("<div class='section-header'>Feature Insights</div>",
612
+ unsafe_allow_html=True)
613
+ insights = [
614
+ ("🏆 #1 — qa_neg_ratio",
615
+ "Proportion of negative sentences in analyst Q&amp;A. When analysts "
616
+ "push back hard, it signals market-moving information that management "
617
+ "tried to downplay."),
618
+ ("📊 #2 — mgmt_sent_vol",
619
+ "Volatility of management's sentence-level sentiment. Inconsistent "
620
+ "messaging — mixing optimism with caution — often precedes larger "
621
+ "price moves."),
622
+ ("📝 #3 — qa_n_sentences",
623
+ "Length of the Q&amp;A section. Longer Q&amp;A sessions indicate "
624
+ "more analyst scrutiny, which correlates with uncertainty about "
625
+ "the quarter's results."),
626
+ ("😶 #4 — mgmt_mean_neu",
627
+ "Neutral sentiment ratio in management remarks. Deliberately neutral "
628
+ "language can mask very good or very bad news — a hedging signal."),
629
+ ("🎯 #5 — rag_guidance_relevance",
630
+ "Semantic similarity of the guidance section to specific numerical "
631
+ "guidance queries. More relevant guidance sections contain concrete "
632
+ "targets that markets react to more strongly."),
633
+ ]
634
+ cols = st.columns(len(insights))
635
+ for col, (title, body) in zip(cols, insights):
636
+ col.markdown(f"""
637
+ <div class='pipeline-step' style='text-align:left;height:190px;'>
638
+ <div style='font-size:0.82rem;font-weight:700;color:#64b5f6;
639
+ margin-bottom:8px;'>{title}</div>
640
+ <div style='font-size:0.76rem;color:#8892b0;line-height:1.6;'>
641
+ {body}</div>
642
+ </div>""", unsafe_allow_html=True)
643
+
644
+
645
+ # ═══════════════════════════════════════════════════════════════════════════════
646
+ # PAGE 4 — BACKTEST
647
+ # ═══════════════════════════════════════════════════════════════════════════════
648
+
649
+ elif page == "💹 Backtest Results":
650
+ bt = load_backtest()
651
+
652
+ st.markdown("<div class='hero-title' style='font-size:2rem;'>Backtest Results</div>",
653
+ unsafe_allow_html=True)
654
+ st.markdown("<div class='hero-sub'>Long-short quartile portfolio. "
655
+ "Long top-25% predicted stocks, short bottom-25%. "
656
+ "5-day holding period. 10bps round-trip transaction cost.</div>",
657
+ unsafe_allow_html=True)
658
+ st.markdown("<hr>", unsafe_allow_html=True)
659
+
660
+ if bt.empty:
661
+ st.error("backtest_results.csv not found. Run Stage 4 first.")
662
+ st.stop()
663
+
664
+ bt = bt.sort_values(["year","quarter"]).reset_index(drop=True)
665
+ bt["period"] = bt["year"].astype(str) + "-Q" + bt["quarter"].astype(str)
666
+ rets = bt["net_ret"]
667
+ cum = (1 + rets).cumprod()
668
+ peak = cum.cummax()
669
+ dd = (cum - peak) / peak
670
+
671
+ n_yrs = len(bt) / 4
672
+ ann_ret = float((1 + rets).prod() ** (1/n_yrs) - 1)
673
+ ann_vol = float(rets.std() * np.sqrt(4))
674
+ sharpe = ann_ret / ann_vol if ann_vol != 0 else 0.0
675
+ max_dd = float(dd.min())
676
+ hit = float((rets > 0).mean())
677
+
678
+ c1,c2,c3,c4,c5 = st.columns(5)
679
+ metric_card(c1, f"{ann_ret*100:.2f}%", "Ann. Return",
680
+ "After TC", "pos" if ann_ret > 0 else "neg")
681
+ metric_card(c2, f"{sharpe:.3f}", "Sharpe Ratio",
682
+ ">1.0 = excellent", "pos" if sharpe > 0 else "neg")
683
+ metric_card(c3, f"{max_dd*100:.2f}%", "Max Drawdown",
684
+ "Peak-to-trough", "neg")
685
+ metric_card(c4, f"{hit*100:.0f}%", "Win Rate",
686
+ "Profitable quarters", "pos" if hit > 0.5 else "neg")
687
+ metric_card(c5, str(len(bt)), "Quarters Tested",
688
+ "2021–2024", "neu")
689
+
690
+ st.markdown("<br>", unsafe_allow_html=True)
691
+ st.markdown("<div class='section-header'>Equity Curve</div>",
692
+ unsafe_allow_html=True)
693
+
694
+ fig = make_subplots(rows=2, cols=1, row_heights=[0.72,0.28],
695
+ shared_xaxes=True, vertical_spacing=0.04)
696
+ fig.add_trace(go.Scatter(
697
+ x=bt["period"], y=cum.values,
698
+ mode="lines+markers",
699
+ line=dict(color="#64b5f6", width=2.5),
700
+ marker=dict(size=7),
701
+ fill="tozeroy", fillcolor="rgba(100,181,246,0.06)",
702
+ name="Cumulative Return",
703
+ hovertemplate="<b>%{x}</b><br>Cumulative: %{y:.4f}<extra></extra>",
704
+ ), row=1, col=1)
705
+ fig.add_hline(y=1.0, line_dash="dash", line_color="#546e7a",
706
+ line_width=1, row=1, col=1)
707
+ fig.add_trace(go.Bar(
708
+ x=bt["period"], y=dd.values*100,
709
+ marker_color="#ef5350", opacity=0.7, name="Drawdown %",
710
+ hovertemplate="<b>%{x}</b><br>Drawdown: %{y:.2f}%<extra></extra>",
711
+ ), row=2, col=1)
712
+
713
+ fig.update_layout(
714
+ paper_bgcolor="#0d1117", plot_bgcolor="#0a0e1a",
715
+ font=dict(color="#b0bec5"),
716
+ margin=dict(l=50,r=30,t=50,b=80),
717
+ title="FinSight Long-Short Strategy — 2021 to 2024",
718
+ height=500, showlegend=False,
719
+ xaxis2=dict(tickangle=45, tickfont_size=10,
720
+ gridcolor="#1a2035", linecolor="#1e2433"),
721
+ yaxis=dict(title="Cumulative Return",
722
+ gridcolor="#1a2035", linecolor="#1e2433"),
723
+ yaxis2=dict(title="DD %",
724
+ gridcolor="#1a2035", linecolor="#1e2433"),
725
+ )
726
+ fig.update_xaxes(gridcolor="#1a2035", linecolor="#1e2433")
727
+ st.plotly_chart(fig, use_container_width=True)
728
+
729
+ col1, col2 = st.columns(2)
730
+
731
+ with col1:
732
+ st.markdown("<div class='subsection'>Quarterly Net Returns</div>",
733
+ unsafe_allow_html=True)
734
+ q_colors = ["#66bb6a" if r > 0 else "#ef5350" for r in rets]
735
+ fig2 = go.Figure(go.Bar(
736
+ x=bt["period"], y=rets.values*100,
737
+ marker_color=q_colors,
738
+ text=[f"{v*100:.2f}%" for v in rets.values],
739
+ textposition="outside", textfont=dict(size=9),
740
+ hovertemplate="<b>%{x}</b><br>Net Return: %{y:.2f}%<extra></extra>",
741
+ ))
742
+ fig2.add_hline(y=0, line_color="#546e7a", line_width=1)
743
+ fig2.update_layout(**L(
744
+ height=320,
745
+ title="Net Return per Quarter (after 10bps TC)",
746
+ xaxis=dict(tickangle=45, tickfont=dict(size=9)),
747
+ yaxis=dict(title="Net Return (%)"),
748
+ ))
749
+ st.plotly_chart(fig2, use_container_width=True)
750
+
751
+ with col2:
752
+ st.markdown("<div class='subsection'>Long vs Short Leg Hit Rate</div>",
753
+ unsafe_allow_html=True)
754
+ fig3 = go.Figure()
755
+ fig3.add_trace(go.Scatter(
756
+ x=bt["period"], y=bt["long_hit"],
757
+ mode="lines+markers",
758
+ line=dict(color="#66bb6a", width=2),
759
+ marker=dict(size=7), name="Long Leg",
760
+ ))
761
+ fig3.add_trace(go.Scatter(
762
+ x=bt["period"], y=bt["short_hit"],
763
+ mode="lines+markers",
764
+ line=dict(color="#ef5350", width=2),
765
+ marker=dict(size=7), name="Short Leg",
766
+ ))
767
+ fig3.add_hline(y=0.5, line_dash="dot", line_color="#546e7a")
768
+ fig3.update_layout(**L(
769
+ height=320,
770
+ title="Direction Accuracy — Long &amp; Short Legs",
771
+ xaxis=dict(tickangle=45, tickfont=dict(size=9)),
772
+ yaxis=dict(title="Hit Rate"),
773
+ legend=dict(bgcolor="#0d1117", bordercolor="#1e2433"),
774
+ ))
775
+ st.plotly_chart(fig3, use_container_width=True)
776
+
777
+ st.markdown("<div class='section-header'>Quarterly Breakdown</div>",
778
+ unsafe_allow_html=True)
779
+ disp = bt[["period","net_ret","long_ret","short_ret",
780
+ "long_hit","short_hit","n_stocks","q_size"]].copy()
781
+ disp.columns = ["Quarter","Net Ret","Long Ret","Short Ret",
782
+ "Long Hit","Short Hit","N Stocks","Leg Size"]
783
+
784
+ def color_ret(val):
785
+ if isinstance(val, float):
786
+ if val > 0: return "color: #66bb6a"
787
+ if val < 0: return "color: #ef5350"
788
+ return ""
789
+
790
+ st.dataframe(
791
+ disp.style.applymap(color_ret,
792
+ subset=["Net Ret","Long Ret","Short Ret"])
793
+ .format({c:"{:.4f}" for c in
794
+ ["Net Ret","Long Ret","Short Ret",
795
+ "Long Hit","Short Hit"]}),
796
+ use_container_width=True, hide_index=True,
797
+ )
798
+
799
+ st.markdown("""
800
+ <div class='insight-box'>
801
+ <strong>Context:</strong> A Sharpe of -0.81 with a 5-day holding period
802
+ is consistent with academic literature on post-earnings announcement
803
+ drift (Chan et al. 1996, Lerman et al. 2008). The signal exists
804
+ (IC=0.0198) but is too weak to survive round-trip transaction costs at
805
+ this frequency. Extending to 20-day holding periods is the natural
806
+ next step.
807
+ </div>
808
+ """, unsafe_allow_html=True)
809
+
810
+
811
+ # ═══════════════════════════════════════════════════════════════════════════════
812
+ # PAGE 5 — TRANSCRIPT EXPLORER
813
+ # ═══════════════════════════════════════════════════════════════════════════════
814
+
815
+ elif page == "🔎 Transcript Explorer":
816
+ fm = load_feature_matrix()
817
+
818
+ st.markdown("<div class='hero-title' style='font-size:2rem;'>Transcript Explorer</div>",
819
+ unsafe_allow_html=True)
820
+ st.markdown("<div class='hero-sub'>Browse sentiment profiles for any company "
821
+ "and quarter in the dataset.</div>",
822
+ unsafe_allow_html=True)
823
+ st.markdown("<hr>", unsafe_allow_html=True)
824
+
825
+ if fm.empty:
826
+ st.error("Feature matrix not found.")
827
+ st.stop()
828
+
829
+ col1, col2, col3 = st.columns([2,1,1])
830
+ with col1:
831
+ all_tickers = sorted(fm["ticker"].dropna().unique())
832
+ default_idx = all_tickers.index("AAPL") if "AAPL" in all_tickers else 0
833
+ ticker = st.selectbox("Select Ticker", all_tickers, index=default_idx)
834
+ with col2:
835
+ years = sorted(fm["year"].unique(), reverse=True)
836
+ year = st.selectbox("Year", years)
837
+ with col3:
838
+ quarters = sorted(fm[fm["year"]==year]["quarter"].unique())
839
+ quarter = st.selectbox("Quarter", quarters)
840
+
841
+ row = fm[(fm["ticker"]==ticker) &
842
+ (fm["year"]==year) &
843
+ (fm["quarter"]==quarter)]
844
+
845
+ if row.empty:
846
+ st.warning("No data for this combination.")
847
+ st.stop()
848
+
849
+ row = row.iloc[0]
850
+
851
+ ret_5d = row.get("ret_5d", 0)
852
+ target = int(row.get("target_5d_up", 0))
853
+ st.markdown(f"""
854
+ <div style='display:flex;align-items:center;gap:16px;margin:16px 0;'>
855
+ <div style='font-size:2rem;font-weight:800;color:#64b5f6;'>{ticker}</div>
856
+ <div style='font-size:1rem;color:#8892b0;'>{int(year)} Q{int(quarter)}</div>
857
+ <div class='badge badge-{"green" if target==1 else "red"}'>
858
+ {"▲ UP" if target==1 else "▼ DOWN"} 5d
859
+ </div>
860
+ <div class='badge badge-blue'>
861
+ 5d Return: {float(ret_5d)*100:.2f}%
862
+ </div>
863
+ </div>
864
+ """, unsafe_allow_html=True)
865
+
866
+ left, right = st.columns([1.2, 1])
867
+
868
+ with left:
869
+ st.markdown("<div class='subsection'>Sentiment Breakdown</div>",
870
+ unsafe_allow_html=True)
871
+ cats = ["Mgmt Positive","Mgmt Neutral","Mgmt Negative",
872
+ "QA Positive","QA Neutral","QA Negative"]
873
+ vals = [
874
+ float(row.get("mgmt_mean_pos", 0) or 0),
875
+ float(row.get("mgmt_mean_neu", 0) or 0),
876
+ float(row.get("mgmt_mean_neg", 0) or 0),
877
+ float(row.get("qa_mean_pos", 0) or 0),
878
+ float(row.get("qa_mean_neu", 0) or 0),
879
+ float(row.get("qa_mean_neg", 0) or 0),
880
+ ]
881
+ vals_c = vals + [vals[0]]
882
+ cats_c = cats + [cats[0]]
883
+ fig = go.Figure(go.Scatterpolar(
884
+ r=vals_c, theta=cats_c, fill="toself",
885
+ fillcolor="rgba(100,181,246,0.15)",
886
+ line=dict(color="#64b5f6", width=2), name=ticker,
887
+ ))
888
+ fig.update_layout(
889
+ paper_bgcolor="#0d1117",
890
+ font=dict(color="#b0bec5"),
891
+ polar=dict(
892
+ bgcolor="#0d1117",
893
+ radialaxis=dict(visible=True, range=[0,1],
894
+ gridcolor="#1a2035", linecolor="#1a2035",
895
+ tickfont=dict(size=9, color="#546e7a")),
896
+ angularaxis=dict(gridcolor="#1a2035", linecolor="#1a2035",
897
+ tickfont=dict(size=10, color="#b0bec5")),
898
+ ),
899
+ height=360, showlegend=False,
900
+ title=f"{ticker} — Sentiment Radar",
901
+ margin=dict(l=40,r=40,t=50,b=40),
902
+ )
903
+ st.plotly_chart(fig, use_container_width=True)
904
+
905
+ with right:
906
+ st.markdown("<div class='subsection'>Feature Scores</div>",
907
+ unsafe_allow_html=True)
908
+
909
+ def score_bar(label, val, invert=False):
910
+ if val is None or pd.isna(val):
911
+ return
912
+ v = float(val)
913
+ pct = max(0, min(1, v)) * 100
914
+ color = "#ef5350" if invert else "#64b5f6"
915
+ st.markdown(f"""
916
+ <div style='margin:8px 0;'>
917
+ <div style='display:flex;justify-content:space-between;
918
+ font-size:0.8rem;color:#8892b0;margin-bottom:3px;'>
919
+ <span>{label}</span><span>{v:.3f}</span>
920
+ </div>
921
+ <div style='background:#1a2035;border-radius:4px;height:6px;'>
922
+ <div style='background:{color};width:{pct:.0f}%;
923
+ height:6px;border-radius:4px;'></div>
924
+ </div>
925
+ </div>""", unsafe_allow_html=True)
926
+
927
+ score_bar("Mgmt Net Sentiment", row.get("mgmt_net_sentiment"))
928
+ score_bar("QA Net Sentiment", row.get("qa_net_sentiment"))
929
+ score_bar("Mgmt Negativity", row.get("mgmt_neg_ratio"), invert=True)
930
+ score_bar("QA Negativity", row.get("qa_neg_ratio"), invert=True)
931
+ score_bar("Guidance Specificity", row.get("rag_guidance_specificity_score"))
932
+ score_bar("Mgmt Confidence", row.get("rag_management_confidence_score"))
933
+ score_bar("Forward Looking", row.get("rag_forward_looking_score"))
934
+ score_bar("New Risks", row.get("rag_new_risks_score"), invert=True)
935
+ score_bar("Cost Pressure", row.get("rag_cost_pressure_score"), invert=True)
936
+
937
+ # Historical trend
938
+ st.markdown(f"<div class='section-header'>{ticker} — Historical Sentiment</div>",
939
+ unsafe_allow_html=True)
940
+
941
+ td = fm[fm["ticker"]==ticker].copy().sort_values(["year","quarter"])
942
+ td["period"] = td["year"].astype(str) + "-Q" + td["quarter"].astype(str)
943
+
944
+ if len(td) > 1:
945
+ fig2 = go.Figure()
946
+ for col_name, label, color in [
947
+ ("mgmt_net_sentiment", "Mgmt Sentiment", "#66bb6a"),
948
+ ("qa_net_sentiment", "QA Sentiment", "#64b5f6"),
949
+ ("mgmt_neg_ratio", "Mgmt Negativity","#ef5350"),
950
+ ]:
951
+ if col_name in td.columns:
952
+ fig2.add_trace(go.Scatter(
953
+ x=td["period"], y=td[col_name],
954
+ mode="lines+markers", name=label,
955
+ line=dict(color=color, width=2),
956
+ marker=dict(size=6),
957
+ hovertemplate=f"<b>{label}</b><br>%{{x}}<br>%{{y:.3f}}<extra></extra>",
958
+ ))
959
+
960
+ # Mark selected quarter — use index position to avoid type issues
961
+ cur_period = f"{int(year)}-Q{int(quarter)}"
962
+ if cur_period in td["period"].values:
963
+ cur_idx = td[td["period"]==cur_period].index[0]
964
+ cur_pos = td["period"].tolist().index(cur_period)
965
+ fig2.add_vrect(
966
+ x0=cur_period, x1=cur_period,
967
+ line_dash="dot", line_color="#ffa726", line_width=2,
968
+ )
969
+
970
+ fig2.add_hline(y=0, line_dash="dash", line_color="#546e7a", line_width=0.8)
971
+ fig2.update_layout(**L(
972
+ height=320,
973
+ title=f"{ticker} — Sentiment Over Time",
974
+ xaxis=dict(tickangle=45, tickfont=dict(size=9)),
975
+ yaxis=dict(title="Score"),
976
+ legend=dict(bgcolor="#0d1117", bordercolor="#1e2433"),
977
+ ))
978
+ st.plotly_chart(fig2, use_container_width=True)
979
+
980
+ # Scatter: sentiment vs return
981
+ if "ret_5d" in td.columns and "mgmt_net_sentiment" in td.columns:
982
+ st.markdown("<div class='subsection'>Sentiment vs 5-Day Return</div>",
983
+ unsafe_allow_html=True)
984
+ tc = td.dropna(subset=["ret_5d","mgmt_net_sentiment"]).copy()
985
+ tc["ret_pct"] = tc["ret_5d"].astype(float) * 100
986
+ sc_colors = ["#66bb6a" if r > 0 else "#ef5350"
987
+ for r in tc["ret_pct"]]
988
+ fig3 = go.Figure(go.Scatter(
989
+ x=tc["mgmt_net_sentiment"].astype(float),
990
+ y=tc["ret_pct"],
991
+ mode="markers+text",
992
+ text=tc["period"],
993
+ textposition="top center",
994
+ textfont=dict(size=8, color="#546e7a"),
995
+ marker=dict(color=sc_colors, size=9, opacity=0.85),
996
+ hovertemplate=(
997
+ "<b>%{text}</b><br>"
998
+ "Mgmt Sentiment: %{x:.3f}<br>"
999
+ "5d Return: %{y:.2f}%<extra></extra>"
1000
+ ),
1001
+ ))
1002
+ fig3.add_vline(x=0, line_dash="dash", line_color="#546e7a")
1003
+ fig3.add_hline(y=0, line_dash="dash", line_color="#546e7a")
1004
+ fig3.update_layout(**L(
1005
+ height=340,
1006
+ title=f"{ticker} — Mgmt Sentiment vs 5-Day Return",
1007
+ xaxis=dict(title="Management Net Sentiment"),
1008
+ yaxis=dict(title="5-Day Return (%)"),
1009
+ ))
1010
+ st.plotly_chart(fig3, use_container_width=True)
1011
+ else:
1012
+ st.info("Not enough historical data for this ticker.")