web_app / src /streamlit_app.py
Akilashamnaka12's picture
Update src/streamlit_app.py
c1742e5 verified
import streamlit as st
import pandas as pd
import numpy as np
import string, time, re, random
from collections import Counter
# ─────────────────────────────────────────────────────────────────────────────
# PAGE CONFIG
# ─────────────────────────────────────────────────────────────────────────────
st.set_page_config(
page_title="NewsLens AI β€” Daily Mirror Intelligence",
page_icon="β—‰",
layout="wide",
initial_sidebar_state="collapsed",
)
# ─────────────────────────────────────────────────────────────────────────────
# NLTK
# ─────────────────────────────────────────────────────────────────────────────
import nltk
@st.cache_resource(show_spinner=False)
def _nltk():
for p in ["punkt","punkt_tab","stopwords","wordnet"]:
nltk.download(p, quiet=True)
_nltk()
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
# ─────────────────────────────────────────────────────────────────────────────
# MODELS
# ─────────────────────────────────────────────────────────────────────────────
@st.cache_resource(show_spinner=False)
def load_clf():
from transformers import pipeline
return pipeline("text-classification",
model="Akilashamnaka12/news_classifier_model",
truncation=True, max_length=512)
@st.cache_resource(show_spinner=False)
def load_qa():
from transformers import AutoTokenizer, AutoModelForQuestionAnswering, pipeline
n = "deepset/roberta-base-squad2"
return pipeline("question-answering",
model=AutoModelForQuestionAnswering.from_pretrained(n),
tokenizer=AutoTokenizer.from_pretrained(n))
# ─────────────────────────────────────────────────────────────────────────────
# CONSTANTS
# ─────────────────────────────────────────────────────────────────────────────
LABEL_MAP = {"LABEL_0":"Business","LABEL_1":"Opinion",
"LABEL_2":"Political_gossip","LABEL_3":"Sports","LABEL_4":"World_news"}
CATS = {
"Business": {"icon":"πŸ’Ό","color":"#0071e3","bg":"#f0f7ff","desc":"Finance & Economy"},
"Opinion": {"icon":"πŸ’¬","color":"#34c759","bg":"#f0fdf4","desc":"Views & Editorials"},
"Political_gossip": {"icon":"πŸ›οΈ", "color":"#ff3b30","bg":"#fff1f2","desc":"Politics & Governance"},
"Sports": {"icon":"⚽","color":"#ff9f0a","bg":"#fff7ed","desc":"Matches & Athletics"},
"World_news": {"icon":"🌍","color":"#5e5ce6","bg":"#f5f3ff","desc":"International Affairs"},
}
_sw = set(stopwords.words("english"))
_lem = WordNetLemmatizer()
def preprocess(t):
if not isinstance(t,str) or not t.strip(): return ""
t = t.lower().translate(str.maketrans("","",string.punctuation))
tokens = [_lem.lemmatize(w) for w in word_tokenize(t)
if w not in _sw and w.isalpha()]
return " ".join(tokens)
def resolve(r): return LABEL_MAP.get(r, r)
def word_cloud_html(text, n=65):
words = re.findall(r'\b[a-zA-Z]{4,}\b', text.lower())
stops = {"this","that","with","have","will","from","they","been","were",
"their","there","also","which","when","into","than","then","about",
"more","over","some","such","just","very","even","only","most","said"}
freq = Counter(w for w in words if w not in stops)
top = freq.most_common(n)
if not top: return "<p style='color:#86868b;text-align:center'>Not enough text.</p>"
mx = top[0][1]
pal = ["#0071e3","#34c759","#ff3b30","#ff9f0a","#5e5ce6","#00c7be","#ff6b9d"]
out = ""
for word,cnt in top:
sz = 0.76 + (cnt/mx)*1.85
col = random.choice(pal)
op = 0.45 + (cnt/mx)*0.55
fw = 300 + int((cnt/mx)*500)
rot = random.choice([-3,-1,0,0,0,1,3])
out += (f'<span style="font-size:{sz:.2f}rem;color:{col};opacity:{op:.2f};'
f'font-weight:{fw};display:inline-block;margin:3px 8px;'
f'transform:rotate({rot}deg);cursor:default;transition:all .2s;"'
f' onmouseover="this.style.opacity=1;this.style.transform=\'scale(1.22)\'"'
f' onmouseout="this.style.opacity={op:.2f};this.style.transform=\'rotate({rot}deg)\'">'
f'{word}</span>')
return f'<div style="text-align:center;line-height:2.6;padding:1.5rem 1rem">{out}</div>'
# ─────────────────────────────────────────────────────────────────────────────
# ═══════════════════════ MASTER CSS ═══════════════════════════════════════════
# ─────────────────────────────────────────────────────────────────────────────
st.markdown("""
<style>
/* ══════════════════════════════════════════════
TOKENS β€” Apple design system
══════════════════════════════════════════════ */
:root{
/* Backgrounds */
--bg-primary: #ffffff;
--bg-secondary: #f5f5f7;
--bg-tertiary: #fbfbfd;
--bg-dark: #1d1d1f;
--bg-darker: #000000;
/* Text */
--text-primary: #1d1d1f;
--text-secondary: #6e6e73;
--text-tertiary: #86868b;
--text-on-dark: #f5f5f7;
--text-on-dark-2: rgba(245,245,247,0.6);
/* Accent */
--blue: #0071e3;
--blue-hv: #0077ed;
--green: #34c759;
--red: #ff3b30;
--orange: #ff9f0a;
--violet: #5e5ce6;
/* Structure */
--border: rgba(0,0,0,0.08);
--border-mid: rgba(0,0,0,0.12);
--border-dark: rgba(255,255,255,0.10);
--radius-sm: 10px;
--radius-md: 18px;
--radius-lg: 24px;
--radius-pill: 999px;
/* Shadows */
--shadow-xs: 0 1px 3px rgba(0,0,0,0.05),0 2px 8px rgba(0,0,0,0.04);
--shadow-sm: 0 2px 6px rgba(0,0,0,0.06),0 6px 20px rgba(0,0,0,0.05);
--shadow-md: 0 4px 16px rgba(0,0,0,0.08),0 16px 48px rgba(0,0,0,0.06);
--shadow-lg: 0 8px 32px rgba(0,0,0,0.12),0 32px 64px rgba(0,0,0,0.08);
/* Spacing */
--page-max: 1040px;
--page-pad: 2.5rem;
--section-v: 5rem;
}
/* ══════════════════════════════════════════════
KILL STREAMLIT CHROME
══════════════════════════════════════════════ */
#MainMenu,footer,header,.stDeployButton,
[data-testid="stToolbar"],
section[data-testid="stSidebar"]{display:none!important}
.block-container{padding:0!important;max-width:100%!important}
.stApp{background:var(--bg-primary)!important}
/* ══════════════════════════════════════════════
BASE TYPOGRAPHY
══════════════════════════════════════════════ */
@import url('https://fonts.googleapis.com/css2?family=Manrope:wght@200;300;400;500;600;700;800&display=swap');
html,body,[class*="css"]{
font-family:-apple-system,BlinkMacSystemFont,'SF Pro Text','Manrope',sans-serif;
color:var(--text-primary);
background:var(--bg-primary);
-webkit-font-smoothing:antialiased;
-moz-osx-font-smoothing:grayscale;
}
/* ══════════════════════════════════════════════
FROSTED GLASS NAVIGATION BAR
══════════════════════════════════════════════ */
#nav{
position:sticky;top:0;z-index:1000;
height:52px;
background:rgba(255,255,255,0.80);
backdrop-filter:blur(20px) saturate(180%);
-webkit-backdrop-filter:blur(20px) saturate(180%);
border-bottom:1px solid var(--border);
display:flex;align-items:center;
padding:0 var(--page-pad);
}
.nav-inner{
max-width:var(--page-max);margin:0 auto;width:100%;
display:flex;align-items:center;gap:0;
}
.nav-logo{
font-size:1.05rem;font-weight:700;
letter-spacing:-.02em;color:var(--text-primary);
white-space:nowrap;margin-right:auto;
}
.nav-logo span{color:var(--blue);}
.nav-items{
display:flex;align-items:center;gap:0;
position:absolute;left:50%;transform:translateX(-50%);
}
.nav-item{
font-size:.82rem;font-weight:400;
color:var(--text-secondary);
padding:0 1.1rem;cursor:pointer;
transition:color .15s;
border:none;background:none;
letter-spacing:-.01em;
}
.nav-item:hover{color:var(--text-primary);}
.nav-item.on{color:var(--text-primary);font-weight:500;}
.nav-badge{
font-size:.68rem;font-weight:500;
background:var(--blue);color:#fff;
border-radius:var(--radius-pill);
padding:2px 8px;margin-left:auto;
letter-spacing:.02em;
}
/* ══════════════════════════════════════════════
HERO β€” Cinematic full-bleed
══════════════════════════════════════════════ */
#hero{
position:relative;
min-height:88vh;
display:flex;align-items:flex-end;
overflow:hidden;
background:var(--bg-darker);
}
.hero-bg{
position:absolute;inset:0;
background-image:url('https://images.unsplash.com/photo-1504711434969-e33886168f5c?w=1920&q=85');
background-size:cover;background-position:center 30%;
opacity:.55;
transition:opacity .5s;
}
.hero-overlay{
position:absolute;inset:0;
background:linear-gradient(
to bottom,
rgba(0,0,0,0) 0%,
rgba(0,0,0,0) 30%,
rgba(0,0,0,.55) 75%,
rgba(0,0,0,.90) 100%
);
}
.hero-mesh{
position:absolute;inset:0;
background:
radial-gradient(ellipse at 20% 60%,rgba(0,113,227,.20) 0%,transparent 55%),
radial-gradient(ellipse at 80% 30%,rgba(94,92,230,.15) 0%,transparent 50%);
}
.hero-content{
position:relative;z-index:2;
width:100%;
max-width:var(--page-max);
margin:0 auto;
padding:0 var(--page-pad) 5rem;
}
.hero-kicker{
display:inline-flex;align-items:center;gap:8px;
border:1px solid rgba(255,255,255,.20);
background:rgba(255,255,255,.08);
backdrop-filter:blur(12px);
border-radius:var(--radius-pill);
padding:5px 16px;margin-bottom:1.6rem;
font-size:.71rem;font-weight:500;
color:rgba(255,255,255,.75);
letter-spacing:.08em;text-transform:uppercase;
}
.kicker-dot{
width:6px;height:6px;border-radius:50%;
background:#34c759;
box-shadow:0 0 0 0 rgba(52,199,89,.5);
animation:ping 2s ease-in-out infinite;
}
@keyframes ping{
0%,100%{box-shadow:0 0 0 0 rgba(52,199,89,.4);}
50% {box-shadow:0 0 0 8px rgba(52,199,89,0);}
}
h1.h-display{
font-size:clamp(3.2rem,6.5vw,6rem);
font-weight:700;letter-spacing:-.04em;
line-height:1.0;color:#ffffff;margin:0 0 1.1rem;
}
h1.h-display em{
font-style:normal;
background:linear-gradient(90deg,#60b0ff 0%,#a78bfa 50%,#34c759 100%);
-webkit-background-clip:text;-webkit-text-fill-color:transparent;
background-clip:text;
}
.h-sub{
font-size:1.15rem;font-weight:300;
color:rgba(255,255,255,.65);
line-height:1.6;letter-spacing:-.01em;
max-width:540px;margin-bottom:2.2rem;
}
.h-actions{display:flex;gap:14px;flex-wrap:wrap;}
.btn-primary{
background:var(--blue);color:#fff;
padding:11px 26px;border-radius:var(--radius-pill);
font-size:.88rem;font-weight:600;
letter-spacing:-.01em;border:none;cursor:pointer;
transition:all .2s;text-decoration:none;display:inline-block;
}
.btn-primary:hover{background:var(--blue-hv);transform:translateY(-1px);
box-shadow:0 6px 20px rgba(0,113,227,.35);}
.btn-ghost{
background:rgba(255,255,255,.12);
border:1px solid rgba(255,255,255,.25);
color:#fff;
padding:10px 24px;border-radius:var(--radius-pill);
font-size:.88rem;font-weight:500;
backdrop-filter:blur(8px);cursor:pointer;
transition:all .2s;text-decoration:none;display:inline-block;
}
.btn-ghost:hover{background:rgba(255,255,255,.18);transform:translateY(-1px);}
/* ══════════════════════════════════════════════
FEATURE BAR β€” Apple product row
══════════════════════════════════════════════ */
#feat-bar{
display:grid;grid-template-columns:repeat(4,1fr);
background:var(--bg-primary);
border-bottom:1px solid var(--border);
}
.fb-cell{
padding:1.8rem 2rem;
border-right:1px solid var(--border);
display:flex;align-items:center;gap:14px;
transition:background .2s;
}
.fb-cell:last-child{border-right:none;}
.fb-cell:hover{background:var(--bg-secondary);}
.fb-icon{
width:44px;height:44px;border-radius:var(--radius-sm);
display:flex;align-items:center;justify-content:center;
font-size:1.3rem;flex-shrink:0;
}
.fb-title{font-size:.88rem;font-weight:600;color:var(--text-primary);letter-spacing:-.01em;}
.fb-sub{font-size:.75rem;color:var(--text-secondary);margin-top:2px;line-height:1.4;}
/* ══════════════════════════════════════════════
PAGE SECTIONS
══════════════════════════════════════════════ */
.section{padding:var(--section-v) var(--page-pad);background:var(--bg-primary);}
.section-alt{padding:var(--section-v) var(--page-pad);background:var(--bg-secondary);}
.section-dark{padding:var(--section-v) var(--page-pad);background:var(--bg-dark);}
.section-inner{max-width:var(--page-max);margin:0 auto;}
/* ══════════════════════════════════════════════
SECTION HEADERS
══════════════════════════════════════════════ */
.s-label{
font-size:.71rem;font-weight:600;
letter-spacing:.1em;text-transform:uppercase;
color:var(--blue);margin-bottom:.6rem;display:block;
}
.s-label-green{color:var(--green)!important;}
.s-label-violet{color:var(--violet)!important;}
.s-label-light{color:rgba(0,113,227,.75)!important;}
h2.s-h{
font-size:clamp(2rem,4vw,3rem);font-weight:700;
letter-spacing:-.035em;line-height:1.08;
color:var(--text-primary);margin:0 0 .7rem;
}
h2.s-h-light{color:var(--text-on-dark)!important;}
.s-p{
font-size:1.05rem;font-weight:300;
color:var(--text-secondary);line-height:1.65;
letter-spacing:-.01em;max-width:500px;margin-bottom:3rem;
}
.s-p-light{color:var(--text-on-dark-2)!important;}
/* ══════════════════════════════════════════════
CARDS
══════════════════════════════════════════════ */
.card{
background:var(--bg-primary);
border:1px solid var(--border);
border-radius:var(--radius-md);
overflow:hidden;
box-shadow:var(--shadow-xs);
transition:box-shadow .3s,transform .25s;
}
.card:hover{box-shadow:var(--shadow-md);transform:translateY(-3px);}
.card-alt{background:var(--bg-secondary);}
.card-dark{background:#2c2c2e;border-color:rgba(255,255,255,.08);}
.card-body{padding:1.8rem 2rem;}
.card-label{
font-size:.7rem;font-weight:600;
letter-spacing:.1em;text-transform:uppercase;
color:var(--blue);margin-bottom:.5rem;display:block;
}
.card-title{
font-size:1.15rem;font-weight:600;
letter-spacing:-.02em;color:var(--text-primary);
margin-bottom:.35rem;line-height:1.25;
}
.card-title-light{color:var(--text-on-dark)!important;}
.card-desc{font-size:.83rem;color:var(--text-secondary);line-height:1.6;font-weight:400;}
.card-desc-light{color:var(--text-on-dark-2)!important;}
/* ══════════════════════════════════════════════
IMAGE HERO CARDS (section banners)
══════════════════════════════════════════════ */
.img-card{
position:relative;border-radius:var(--radius-lg);
overflow:hidden;min-height:200px;
display:flex;align-items:flex-end;margin-bottom:2.5rem;
box-shadow:var(--shadow-md);
}
.img-card-bg{
position:absolute;inset:0;
background-size:cover;background-position:center;
filter:brightness(.38) saturate(.7);
}
.img-card-overlay{
position:absolute;inset:0;
background:linear-gradient(105deg,
rgba(0,0,0,.88) 0%,
rgba(0,0,0,.40) 60%,
transparent 100%);
}
.img-card-body{
position:relative;z-index:2;
padding:2.2rem 2.6rem;width:100%;
}
.ic-tag{
font-size:.68rem;font-weight:600;
letter-spacing:.12em;text-transform:uppercase;
color:rgba(255,255,255,.45);margin-bottom:.5rem;display:block;
}
.ic-title{
font-size:2rem;font-weight:700;
letter-spacing:-.03em;color:#fff;line-height:1.1;
}
.ic-sub{
font-size:.85rem;color:rgba(255,255,255,.5);
margin-top:.4rem;font-weight:300;
}
/* ══════════════════════════════════════════════
STAT TILES
══════════════════════════════════════════════ */
.stat-grid{
display:grid;grid-template-columns:repeat(5,1fr);
gap:10px;margin:1.5rem 0;
}
.stat-tile{
background:var(--bg-secondary);
border:1px solid var(--border);
border-radius:var(--radius-md);
padding:1.2rem 1rem;text-align:center;
transition:all .2s;cursor:default;
}
.stat-tile:hover{
background:var(--bg-primary);
box-shadow:var(--shadow-sm);
transform:translateY(-2px);
}
.st-icon{font-size:1.5rem;margin-bottom:.45rem;display:block;}
.st-num{
font-size:1.9rem;font-weight:700;
letter-spacing:-.04em;line-height:1;
}
.st-lbl{
font-size:.66rem;font-weight:500;
color:var(--text-tertiary);margin-top:4px;
letter-spacing:.01em;
}
/* ══════════════════════════════════════════════
CATEGORY LEGEND
══════════════════════════════════════════════ */
.cat-item{
display:flex;align-items:center;gap:14px;
padding:12px 0;border-bottom:1px solid var(--border);
transition:background .15s;
}
.cat-pip{
width:8px;height:8px;border-radius:50%;flex-shrink:0;
}
.cat-icon-box{
width:34px;height:34px;border-radius:10px;
display:flex;align-items:center;justify-content:center;
font-size:1rem;flex-shrink:0;
border:1px solid var(--border);
background:var(--bg-secondary);
}
.cat-name{font-size:.88rem;font-weight:600;color:var(--text-primary);letter-spacing:-.01em;}
.cat-desc{font-size:.74rem;color:var(--text-secondary);margin-top:1px;font-weight:400;}
/* ══════════════════════════════════════════════
ANSWER DISPLAY
══════════════════════════════════════════════ */
.answer-wrap{
background:#f0f7ff;
border:1px solid rgba(0,113,227,.15);
border-left:3px solid var(--blue);
border-radius:0 var(--radius-md) var(--radius-md) 0;
padding:1.8rem 2rem;margin-top:1.4rem;
}
.answer-chip{
display:inline-block;
background:var(--blue);color:#fff;
font-size:.66rem;font-weight:600;
letter-spacing:.1em;text-transform:uppercase;
padding:3px 10px;border-radius:var(--radius-pill);
margin-bottom:.75rem;
}
.answer-text{
font-size:1.4rem;font-weight:600;
letter-spacing:-.025em;color:var(--text-primary);
line-height:1.4;
}
.answer-meta{
font-size:.78rem;color:var(--text-secondary);
margin-top:.8rem;font-weight:400;
}
.answer-meta strong{color:var(--blue);font-weight:600;}
/* ══════════════════════════════════════════════
SUMMARY
══════════════════════════════════════════════ */
.summary-wrap{
background:var(--bg-secondary);
border:1px solid var(--border);
border-radius:var(--radius-md);
padding:1.6rem 2rem;margin-top:1.2rem;
}
.summary-chip{
display:inline-block;
background:var(--text-primary);color:#fff;
font-size:.66rem;font-weight:600;
letter-spacing:.1em;text-transform:uppercase;
padding:3px 10px;border-radius:var(--radius-pill);margin-bottom:.7rem;
}
.summary-text{
font-size:.95rem;font-weight:400;
color:var(--text-primary);line-height:1.8;letter-spacing:-.01em;
}
/* ══════════════════════════════════════════════
CONF BARS
══════════════════════════════════════════════ */
.conf-row{display:flex;align-items:center;gap:12px;margin-bottom:9px;}
.conf-lbl{
width:118px;font-size:.78rem;font-weight:500;
color:var(--text-secondary);flex-shrink:0;letter-spacing:-.01em;
}
.conf-bg{
flex:1;height:4px;
background:rgba(0,0,0,.07);
border-radius:999px;overflow:hidden;
}
.conf-fg{height:100%;border-radius:999px;}
.conf-pct{
width:38px;text-align:right;
font-size:.75rem;font-weight:600;color:var(--text-primary);
}
/* ══════════════════════════════════════════════
TIPS
══════════════════════════════════════════════ */
.tip-row{
display:flex;gap:14px;padding:12px 0;
border-bottom:1px solid var(--border);
align-items:flex-start;
}
.tip-num{
font-size:.7rem;font-weight:600;
color:var(--blue);width:18px;flex-shrink:0;padding-top:2px;
}
.tip-title{
font-size:.87rem;font-weight:600;
color:var(--text-primary);letter-spacing:-.01em;
}
.tip-body{font-size:.77rem;color:var(--text-secondary);margin-top:2px;line-height:1.5;}
/* ══════════════════════════════════════════════
METRIC CARDS (Insights)
══════════════════════════════════════════════ */
.metric-card{
background:var(--bg-primary);
border:1px solid var(--border);
border-radius:var(--radius-md);
padding:1.8rem 1.6rem;
text-align:center;
box-shadow:var(--shadow-xs);
transition:all .2s;
}
.metric-card:hover{box-shadow:var(--shadow-md);transform:translateY(-2px);}
.metric-val{
font-size:2.6rem;font-weight:700;
letter-spacing:-.05em;line-height:1;margin-bottom:.5rem;
}
.metric-lbl{
font-size:.76rem;font-weight:500;
color:var(--text-secondary);letter-spacing:.01em;
text-transform:uppercase;
}
/* ══════════════════════════════════════════════
SPOTLIGHT
══════════════════════════════════════════════ */
.spotlight{
background:var(--bg-primary);
border:1px solid var(--border);
border-radius:var(--radius-lg);
padding:2rem 2.2rem;
box-shadow:var(--shadow-sm);
}
.spot-badge{
display:inline-flex;align-items:center;gap:6px;
padding:4px 12px;border-radius:var(--radius-pill);
font-size:.76rem;font-weight:600;
border:1px solid;margin-right:8px;margin-bottom:1rem;
}
.spot-text{
font-size:.93rem;color:var(--text-secondary);
line-height:1.8;font-weight:400;letter-spacing:-.01em;
}
/* ══════════════════════════════════════════════
WORD CLOUD
══════════════════════════════════════════════ */
.wc-wrap{
background:var(--bg-primary);
border:1px solid var(--border);
border-radius:var(--radius-md);
min-height:260px;padding:1.5rem;
box-shadow:var(--shadow-xs);
}
/* ══════════════════════════════════════════════
EMPTY STATE
══════════════════════════════════════════════ */
.empty-state{
text-align:center;padding:5rem 2rem;
border:1px dashed var(--border);
border-radius:var(--radius-lg);
background:var(--bg-secondary);
}
.empty-icon{font-size:2.8rem;opacity:.3;display:block;margin-bottom:1rem;}
.empty-title{
font-size:1.05rem;font-weight:600;
color:var(--text-primary);letter-spacing:-.02em;margin-bottom:.35rem;
}
.empty-sub{font-size:.83rem;color:var(--text-secondary);}
/* ══════════════════════════════════════════════
DIVIDERS
══════════════════════════════════════════════ */
.div-line{border:none;height:1px;margin:0;background:var(--border);}
.div-gap {border:none;height:1px;margin:2rem 0;background:var(--border);}
/* ══════════════════════════════════════════════
STREAMLIT WIDGET RESETS β€” Apple-quality
══════════════════════════════════════════════ */
/* Primary button */
.stButton>button{
background:var(--blue)!important;color:#fff!important;
font-family:-apple-system,BlinkMacSystemFont,'SF Pro Text','Manrope',sans-serif!important;
font-weight:600!important;font-size:.85rem!important;
letter-spacing:-.01em!important;border:none!important;
border-radius:var(--radius-pill)!important;
padding:.62rem 1.6rem!important;
box-shadow:0 2px 8px rgba(0,113,227,.2)!important;
transition:all .2s!important;
}
.stButton>button:hover{
background:var(--blue-hv)!important;
transform:translateY(-1px)!important;
box-shadow:0 6px 18px rgba(0,113,227,.3)!important;
}
.stButton>button:active{transform:scale(.98)!important;}
/* Download button */
.stDownloadButton>button{
background:transparent!important;
color:var(--blue)!important;
border:1.5px solid rgba(0,113,227,.35)!important;
font-family:-apple-system,BlinkMacSystemFont,'SF Pro Text','Manrope',sans-serif!important;
font-weight:500!important;font-size:.82rem!important;
border-radius:var(--radius-pill)!important;
padding:.55rem 1.4rem!important;
transition:all .2s!important;
}
.stDownloadButton>button:hover{
background:#f0f7ff!important;border-color:var(--blue)!important;
}
/* File uploader */
div[data-testid="stFileUploader"]{
background:var(--bg-secondary)!important;
border:1.5px dashed var(--border-mid)!important;
border-radius:var(--radius-md)!important;
transition:all .2s!important;
}
div[data-testid="stFileUploader"]:hover{
border-color:var(--blue)!important;
background:#f0f7ff!important;
}
div[data-testid="stFileUploader"] *{color:var(--text-secondary)!important;}
/* Text inputs */
.stTextArea textarea,.stTextInput input{
background:var(--bg-primary)!important;
border:1px solid var(--border-mid)!important;
border-radius:var(--radius-sm)!important;
color:var(--text-primary)!important;
font-family:-apple-system,BlinkMacSystemFont,'SF Pro Text','Manrope',sans-serif!important;
font-size:.9rem!important;font-weight:400!important;
transition:all .2s!important;
box-shadow:inset 0 1px 3px rgba(0,0,0,.04)!important;
}
.stTextArea textarea:focus,.stTextInput input:focus{
border-color:var(--blue)!important;
box-shadow:0 0 0 3px rgba(0,113,227,.1),inset 0 1px 3px rgba(0,0,0,.04)!important;
outline:none!important;
}
.stTextArea label,.stTextInput label{
font-size:.75rem!important;font-weight:600!important;
color:var(--text-primary)!important;letter-spacing:.02em!important;
text-transform:uppercase!important;
font-family:-apple-system,BlinkMacSystemFont,'SF Pro Text','Manrope',sans-serif!important;
}
/* Select */
.stSelectbox label{
font-size:.75rem!important;font-weight:600!important;
color:var(--text-primary)!important;letter-spacing:.02em!important;
text-transform:uppercase!important;
font-family:-apple-system,BlinkMacSystemFont,'SF Pro Text','Manrope',sans-serif!important;
}
.stSelectbox [data-baseweb="select"]>div{
background:var(--bg-primary)!important;
border:1px solid var(--border-mid)!important;
border-radius:var(--radius-sm)!important;
color:var(--text-primary)!important;font-weight:400!important;
}
.stSelectbox [data-baseweb="select"]>div:focus-within{
border-color:var(--blue)!important;
box-shadow:0 0 0 3px rgba(0,113,227,.1)!important;
}
/* Slider */
.stSlider label{
font-size:.75rem!important;font-weight:600!important;
color:var(--text-primary)!important;letter-spacing:.02em!important;
text-transform:uppercase!important;
font-family:-apple-system,BlinkMacSystemFont,'SF Pro Text','Manrope',sans-serif!important;
}
div[data-baseweb="slider"]>div>div>div{background:var(--blue)!important;}
/* Radio */
.stRadio>label{
font-size:.75rem!important;font-weight:600!important;
color:var(--text-primary)!important;letter-spacing:.02em!important;
text-transform:uppercase!important;
font-family:-apple-system,BlinkMacSystemFont,'SF Pro Text','Manrope',sans-serif!important;
}
.stRadio [data-testid="stMarkdownContainer"] p{
font-size:.85rem!important;color:var(--text-primary)!important;
}
/* Progress */
.stProgress>div>div{background:var(--blue)!important;border-radius:999px!important;}
/* Tabs β€” pill style */
.stTabs [data-baseweb="tab-list"]{
background:var(--bg-secondary)!important;
border:1px solid var(--border)!important;
border-radius:var(--radius-pill)!important;
padding:4px!important;gap:2px!important;
margin-bottom:2rem!important;
box-shadow:var(--shadow-xs)!important;
display:inline-flex!important;width:auto!important;
}
.stTabs [data-baseweb="tab"]{
font-family:-apple-system,BlinkMacSystemFont,'SF Pro Text','Manrope',sans-serif!important;
font-size:.8rem!important;font-weight:500!important;
color:var(--text-secondary)!important;
padding:.45rem 1.2rem!important;
border-radius:var(--radius-pill)!important;
border:none!important;background:transparent!important;
transition:all .15s!important;letter-spacing:-.01em!important;
}
.stTabs [data-baseweb="tab"]:hover{color:var(--text-primary)!important;}
.stTabs [aria-selected="true"]{
background:var(--bg-primary)!important;
color:var(--text-primary)!important;
box-shadow:var(--shadow-xs)!important;
font-weight:600!important;
}
.stTabs [data-baseweb="tab-border"]{display:none!important;}
.stTabs [data-baseweb="tab-panel"]{padding-top:0!important;}
/* Dataframe */
.stDataFrame{border-radius:var(--radius-sm)!important;overflow:hidden!important;
border:1px solid var(--border)!important;box-shadow:var(--shadow-xs)!important;}
/* Alerts */
.stAlert{border-radius:var(--radius-sm)!important;}
/* Expander */
details{background:var(--bg-primary)!important;border:1px solid var(--border)!important;
border-radius:var(--radius-sm)!important;}
details summary{font-size:.83rem!important;color:var(--text-secondary)!important;}
/* Scrollbar */
::-webkit-scrollbar{width:5px;height:5px;}
::-webkit-scrollbar-track{background:var(--bg-secondary);}
::-webkit-scrollbar-thumb{background:rgba(0,0,0,.1);border-radius:999px;}
::-webkit-scrollbar-thumb:hover{background:rgba(0,0,0,.2);}
/* ══════════════════════════════════════════════
ANIMATIONS
══════════════════════════════════════════════ */
@keyframes riseIn{
from{opacity:0;transform:translateY(20px);}
to{opacity:1;transform:translateY(0);}
}
.rise{animation:riseIn .5s cubic-bezier(.22,1,.36,1) both;}
.r1{animation-delay:.05s}.r2{animation-delay:.12s}
.r3{animation-delay:.19s}.r4{animation-delay:.26s}
/* ══════════════════════════════════════════════
FOOTER
══════════════════════════════════════════════ */
#footer{
background:var(--bg-secondary);
border-top:1px solid var(--border);
padding:3rem var(--page-pad) 2.5rem;
}
.footer-inner{max-width:var(--page-max);margin:0 auto;}
.footer-top{
display:flex;justify-content:space-between;
align-items:flex-start;flex-wrap:wrap;gap:2rem;
padding-bottom:2rem;
border-bottom:1px solid var(--border);margin-bottom:1.5rem;
}
.footer-brand{
font-size:.95rem;font-weight:700;
color:var(--text-primary);letter-spacing:-.02em;margin-bottom:.3rem;
}
.footer-brand span{color:var(--blue);}
.footer-tagline{font-size:.78rem;color:var(--text-secondary);}
.footer-links{display:flex;gap:2rem;flex-wrap:wrap;}
.footer-link{font-size:.78rem;color:var(--text-secondary);transition:color .15s;}
.footer-link:hover{color:var(--text-primary);}
.footer-copy{font-size:.72rem;color:var(--text-tertiary);}
</style>
""", unsafe_allow_html=True)
# ─────────────────────────────────────────────────────────────────────────────
# STATE
# ─────────────────────────────────────────────────────────────────────────────
if "page" not in st.session_state:
st.session_state["page"] = "classify"
# ─────────────────────────────────────────────────────────────────────────────
# ══════════════ NAVIGATION ═══════════════════════════════════════════════════
# ─────────────────────────────────────────────────────────────────────────────
pg = st.session_state["page"]
st.markdown(f"""
<div id="nav">
<div class="nav-inner">
<div class="nav-logo">β—‰ News<span>Lens AI</span></div>
<div class="nav-items">
<a class="nav-item {'on' if pg=='classify' else ''}">Classify</a>
<a class="nav-item {'on' if pg=='qa' else ''}">Q &amp; A</a>
<a class="nav-item {'on' if pg=='insights' else ''}">Insights</a>
</div>
<div class="nav-badge">DA3111</div>
</div>
</div>
""", unsafe_allow_html=True)
# Nav button row (functional, visually hidden by CSS)
c1,c2,c3,_ = st.columns([1,1,1,6])
with c1:
if st.button("Classify", key="nb1", use_container_width=True):
st.session_state["page"] = "classify"; st.rerun()
with c2:
if st.button("Q & A", key="nb2", use_container_width=True):
st.session_state["page"] = "qa"; st.rerun()
with c3:
if st.button("Insights", key="nb3", use_container_width=True):
st.session_state["page"] = "insights"; st.rerun()
# ─────────────────────────────────────────────────────────────────────────────
# ══════════════ HERO ═════════════════════════════════════════════════════════
# ─────────────────────────────────────────────────────────────────────────────
st.markdown("""
<div id="hero">
<div class="hero-bg"></div>
<div class="hero-overlay"></div>
<div class="hero-mesh"></div>
<div style="width:100%">
<div class="hero-content">
<div class="hero-kicker">
<span class="kicker-dot"></span>
Daily Mirror Β· AI Intelligence Β· Assignment 01
</div>
<h1 class="h-display">
News that<br><em>understands itself.</em>
</h1>
<p class="h-sub">
Classify articles, extract answers, and surface visual
insights from Daily Mirror news β€” powered by fine-tuned
Hugging Face Transformers.
</p>
<div class="h-actions">
<a class="btn-primary">Get Started β†’</a>
<a class="btn-ghost">Learn More</a>
</div>
</div>
</div>
</div>
""", unsafe_allow_html=True)
# Feature bar
st.markdown("""
<div id="feat-bar">
<div class="fb-cell">
<div class="fb-icon" style="background:#eff6ff;">🧠</div>
<div>
<div class="fb-title">DistilBERT Classifier</div>
<div class="fb-sub">Fine-tuned on 5 news categories</div>
</div>
</div>
<div class="fb-cell">
<div class="fb-icon" style="background:#f0fdf4;">πŸ’¬</div>
<div>
<div class="fb-title">RoBERTa Q&A</div>
<div class="fb-sub">Extractive answers with highlights</div>
</div>
</div>
<div class="fb-cell">
<div class="fb-icon" style="background:#faf5ff;">πŸ“Š</div>
<div>
<div class="fb-title">Visual Insights</div>
<div class="fb-sub">Charts, word clouds, distributions</div>
</div>
</div>
<div class="fb-cell">
<div class="fb-icon" style="background:#fff7ed;">βš™οΈ</div>
<div>
<div class="fb-title">NLP Preprocessing</div>
<div class="fb-sub">7-step NLTK pipeline built in</div>
</div>
</div>
</div>
<hr class="div-line">
""", unsafe_allow_html=True)
# ─────────────────────────────────────────────────────────────────────────────
# ══════════════ PAGE: CLASSIFY ═══════════════════════════════════════════════
# ─────────────────────────────────────────────────────────────────────────────
if pg == "classify":
# ── Section header ──────────────────────────────────────────────────────
st.markdown("""
<div class="section">
<div class="section-inner">
<span class="s-label rise r1">Component 01 Β· Text Classification</span>
<h2 class="s-h rise r2">Every article,<br>perfectly categorised.</h2>
<p class="s-p rise r3">Upload your CSV and a fine-tuned DistilBERT model
instantly sorts each article into one of five categories β€”
Business, Opinion, Political Gossip, Sports, or World News.</p>
</div>
</div>
""", unsafe_allow_html=True)
st.markdown('<div class="section-alt"><div class="section-inner">', unsafe_allow_html=True)
# Image banner
st.markdown("""
<div class="img-card rise r1">
<div class="img-card-bg"
style="background-image:url('https://images.unsplash.com/photo-1495020689067-958852a7765e?w=1400&q=80');">
</div>
<div class="img-card-overlay"></div>
<div class="img-card-body">
<span class="ic-tag">Upload Β· Preprocess Β· Classify Β· Download</span>
<div class="ic-title">News Classification at Scale</div>
<div class="ic-sub">7-step preprocessing pipeline Β· Batch inference Β· CSV output</div>
</div>
</div>
""", unsafe_allow_html=True)
col_L, col_R = st.columns([3, 2], gap="large")
# ── LEFT COLUMN ─────────────────────────────────────────────────────────
with col_L:
# Upload card
st.markdown('<div class="card rise r2"><div class="card-body">', unsafe_allow_html=True)
st.markdown('<span class="card-label">Step 01 β€” Upload</span>', unsafe_allow_html=True)
st.markdown('<div class="card-title">Select your CSV file</div>', unsafe_allow_html=True)
st.markdown(
'<div class="card-desc" style="margin-bottom:1.3rem">Requires a '
'<code style="background:#f1f5f9;border:1px solid #e2e8f0;border-radius:5px;'
'padding:1px 7px;font-size:.82rem;color:#0071e3;">content</code>'
' column. Compatible with the evaluation.csv provided with this assignment.</div>',
unsafe_allow_html=True)
uploaded = st.file_uploader("", type=["csv"], key="cls_upload",
label_visibility="collapsed")
if uploaded:
df = pd.read_csv(uploaded)
st.success(f"βœ“ &nbsp; {len(df):,} records loaded &nbsp;Β·&nbsp; {len(df.columns)} columns")
if "content" not in df.columns:
st.error(f"Column `content` not found. "
f"Found: **{', '.join(df.columns.tolist())}**")
st.stop()
with st.expander("Preview β€” first 5 rows"):
st.dataframe(df.head(), use_container_width=True)
st.markdown("<br>", unsafe_allow_html=True)
if st.button("Run Classification Pipeline", key="run_cls"):
with st.status("βš™οΈ Preprocessing text (7 steps)…",
expanded=False) as s:
cleaned = df["content"].fillna("").apply(preprocess).tolist()
s.update(label="βœ… Preprocessing complete", state="complete")
with st.spinner("Loading model β€” first run takes ~30s…"):
clf = load_clf()
prog = st.progress(0, text="Classifying articles…")
preds, confs = [], []
for i in range(0, len(cleaned), 16):
batch = [t if t.strip() else " " for t in cleaned[i:i+16]]
results = clf(batch, truncation=True, max_length=512)
for r in results:
preds.append(resolve(r["label"]))
confs.append(round(r["score"], 4))
pct = min(int((i+16)/len(cleaned)*100), 100)
prog.progress(pct, text=f"Classifying… {pct}%")
time.sleep(0.01)
prog.empty()
out = df.copy()
out["class"] = preds
out["confidence"] = confs
st.session_state["out_df"] = out
st.success("βœ… Classification complete β€” results ready below.")
st.markdown("</div></div>", unsafe_allow_html=True)
# Results
if "out_df" in st.session_state:
out = st.session_state["out_df"]
counts = out["class"].value_counts()
# Stat tiles
st.markdown('<div class="stat-grid rise r3">', unsafe_allow_html=True)
for label, meta in CATS.items():
n = counts.get(label, 0)
st.markdown(f"""
<div class="stat-tile"
style="border-top:2px solid {meta['color']};">
<span class="st-icon">{meta['icon']}</span>
<div class="st-num" style="color:{meta['color']}">{n}</div>
<div class="st-lbl">{label.replace('_',' ')}</div>
</div>""", unsafe_allow_html=True)
st.markdown("</div>", unsafe_allow_html=True)
# Tabbed results
st.markdown('<div class="card rise r4" style="margin-top:1rem">', unsafe_allow_html=True)
st.markdown('<div class="card-body">', unsafe_allow_html=True)
st.markdown('<span class="card-label">Results</span>', unsafe_allow_html=True)
st.markdown('<div class="card-title" style="margin-bottom:1.2rem">'
'Classified Articles</div>', unsafe_allow_html=True)
all_t, *cat_ts = st.tabs(
["All Articles"] +
[f"{CATS[l]['icon']} {l.replace('_',' ')}" for l in CATS]
)
with all_t:
st.dataframe(out[["content","class","confidence"]],
use_container_width=True, height=320)
for i, label in enumerate(CATS):
with cat_ts[i]:
sub = out[out["class"]==label][["content","confidence"]]
if sub.empty:
st.info(f"No articles classified as **{label.replace('_',' ')}**.")
else:
st.dataframe(sub, use_container_width=True, height=280)
st.markdown("<br>", unsafe_allow_html=True)
avg_c = out["confidence"].mean() if "confidence" in out.columns else 0
hi = (out["confidence"]>=0.9).sum() if "confidence" in out.columns else 0
st.markdown(
f'<p style="font-size:.8rem;color:var(--text-secondary);margin-bottom:1rem;">'
f'Average confidence &nbsp;<strong style="color:var(--blue)">{avg_c:.1%}</strong>'
f'&nbsp; Β· &nbsp;'
f'High confidence β‰₯ 90% &nbsp;<strong style="color:var(--blue)">{hi}</strong>'
f'</p>', unsafe_allow_html=True)
st.download_button(
"⬇ Download output.csv",
data=out.to_csv(index=False).encode("utf-8"),
file_name="output.csv", mime="text/csv",
)
st.markdown("</div></div>", unsafe_allow_html=True)
else:
st.markdown("""
<div class="empty-state rise r3">
<span class="empty-icon">β—‰</span>
<div class="empty-title">No file selected yet</div>
<div class="empty-sub">Upload your evaluation.csv above to begin</div>
</div>""", unsafe_allow_html=True)
# ── RIGHT COLUMN ────────────────────────────────────────────────────────
with col_R:
st.markdown('<div class="card rise r2" style="position:sticky;top:72px">', unsafe_allow_html=True)
st.markdown('<div class="card-body">', unsafe_allow_html=True)
st.markdown('<span class="card-label">Reference</span>', unsafe_allow_html=True)
st.markdown('<div class="card-title" style="margin-bottom:1.3rem">'
'Five News Categories</div>', unsafe_allow_html=True)
for label, meta in CATS.items():
st.markdown(f"""
<div class="cat-item">
<div class="cat-pip" style="background:{meta['color']}"></div>
<div class="cat-icon-box">{meta['icon']}</div>
<div>
<div class="cat-name">{label.replace('_',' ')}</div>
<div class="cat-desc">{meta['desc']}</div>
</div>
</div>""", unsafe_allow_html=True)
if "out_df" in st.session_state:
st.markdown('<hr class="div-gap">', unsafe_allow_html=True)
st.markdown('<span class="card-label" style="color:var(--violet)">'
'Distribution Chart</span>', unsafe_allow_html=True)
st.bar_chart(
st.session_state["out_df"]["class"].value_counts(),
use_container_width=True, height=190,
)
st.markdown("</div></div>", unsafe_allow_html=True)
st.markdown("</div></div>", unsafe_allow_html=True) # /section-inner /section-alt
# ─────────────────────────────────────────────────────────────────────────────
# ══════════════ PAGE: Q&A ════════════════════════════════════════════════════
# ─────────────────────────────────────────────────────────────────────────────
elif pg == "qa":
st.markdown("""
<div class="section">
<div class="section-inner">
<span class="s-label s-label-green rise r1">Component 02 Β· Question-Answering</span>
<h2 class="s-h rise r2">Ask anything.<br>Get precise answers.</h2>
<p class="s-p rise r3">
Paste any news article and ask a natural language question.
The AI reads the passage and extracts an exact, source-referenced answer
β€” powered by deepset/roberta-base-squad2 (SQuAD 2.0).
</p>
</div>
</div>
""", unsafe_allow_html=True)
st.markdown('<div class="section-alt"><div class="section-inner">', unsafe_allow_html=True)
# Image banner
st.markdown("""
<div class="img-card rise r1">
<div class="img-card-bg"
style="background-image:url('https://images.unsplash.com/photo-1457369804613-52c61a468e7d?w=1400&q=80');
background-position:center 50%;">
</div>
<div class="img-card-overlay"></div>
<div class="img-card-body">
<span class="ic-tag">Extractive QA Β· RoBERTa Β· SQuAD 2.0</span>
<div class="ic-title">Intelligence That Reads Closely</div>
<div class="ic-sub">Ask in plain language Β· Get source-highlighted answers</div>
</div>
</div>
""", unsafe_allow_html=True)
col_qa, col_side = st.columns([3, 2], gap="large")
with col_qa:
st.markdown('<div class="card rise r2"><div class="card-body">', unsafe_allow_html=True)
st.markdown('<span class="card-label" style="color:var(--green)">'
'Input</span>', unsafe_allow_html=True)
st.markdown('<div class="card-title" style="margin-bottom:1.2rem">'
'Paste article &amp; ask</div>', unsafe_allow_html=True)
src = st.radio("Text Source",
["Paste article text", "Pick from classified results"],
horizontal=True, key="qa_src")
context = ""
if src == "Paste article text":
context = st.text_area(
"News Article",
height=210,
placeholder="Paste any Daily Mirror news article here…",
key="qa_ctx",
)
else:
if "out_df" not in st.session_state:
st.info("ℹ️ Run the **Classify** pipeline first to use this option.")
else:
out_df = st.session_state["out_df"]
sel_cat = st.selectbox(
"Filter Category",
["All"] + [l.replace("_"," ") for l in CATS],
key="qa_cat",
)
pool = (out_df if sel_cat == "All"
else out_df[out_df["class"].isin(
[sel_cat, sel_cat.replace(" ","_")])])
if not pool.empty:
idx = st.selectbox(
"Select Article",
pool.index.tolist(),
format_func=lambda i:
f"#{i} β€” {str(pool.loc[i,'content'])[:72]}…",
key="qa_idx",
)
row = pool.loc[idx]
context = str(row["content"])
lbl = row.get("class","")
meta = CATS.get(lbl, {"icon":"β—‰","color":"#1d1d1f","bg":"#f5f5f7"})
conf_v = row.get("confidence", None)
st.markdown(f"""
<div style="display:inline-flex;align-items:center;gap:6px;
background:{meta['bg']};
border:1px solid {meta['color']}30;
border-radius:var(--radius-pill);
padding:4px 14px;margin:.6rem 0 .9rem;
font-size:.78rem;font-weight:600;color:{meta['color']};">
{meta['icon']}&nbsp; {lbl.replace('_',' ')}
{f" &nbsp;Β·&nbsp; {conf_v:.1%}" if conf_v else ""}
</div>
<div style="background:var(--bg-secondary);border:1px solid var(--border);
border-radius:var(--radius-sm);padding:1rem 1.2rem;
font-size:.87rem;color:var(--text-secondary);
line-height:1.7;max-height:160px;overflow-y:auto;
margin-bottom:.8rem;">{context}</div>
""", unsafe_allow_html=True)
st.markdown("<br>", unsafe_allow_html=True)
question = st.text_input(
"Your Question",
placeholder="e.g. Who announced the new policy?",
key="qa_q",
)
st.markdown("<br>", unsafe_allow_html=True)
if st.button("Extract Answer", key="run_qa"):
if not context.strip():
st.warning("⚠️ Please provide article text.")
elif not question.strip():
st.warning("⚠️ Please enter a question.")
else:
with st.spinner("Reading the passage…"):
qa_pipe = load_qa()
result = qa_pipe(question=question, context=context)
ans = result["answer"]
score = result["score"]
s, e = result["start"], result["end"]
highlighted = (
context[:s]
+ f'<mark style="background:#dbeafe;color:#1d4ed8;'
f'padding:0 3px;border-radius:3px;font-weight:500;">'
f'{context[s:e]}</mark>'
+ context[e:]
)
st.markdown(f"""
<div class="answer-wrap">
<span class="answer-chip">Answer</span>
<div class="answer-text">{ans}</div>
<div class="answer-meta">
Confidence &nbsp;<strong>{score:.1%}</strong>
&nbsp;Β·&nbsp; deepset/roberta-base-squad2
</div>
</div>""", unsafe_allow_html=True)
with st.expander("View highlighted source context"):
st.markdown(
f'<div style="font-size:.87rem;line-height:1.8;'
f'color:var(--text-secondary);">{highlighted}</div>',
unsafe_allow_html=True)
st.markdown("</div></div>", unsafe_allow_html=True)
with col_side:
st.markdown('<div class="card rise r2"><div class="card-body">', unsafe_allow_html=True)
st.markdown('<span class="card-label" style="color:var(--green)">'
'Tips</span>', unsafe_allow_html=True)
st.markdown('<div class="card-title" style="margin-bottom:1rem">'
'Better questions,<br>better answers</div>', unsafe_allow_html=True)
for i, (t, d) in enumerate([
("Who Β· What Β· When Β· Where",
"Factual questions extract the sharpest answers"),
("Provide full context",
"Longer passages give the model more evidence to work from"),
("Stay specific",
"Narrow, focused questions outperform vague ones every time"),
("Full sentence questions",
"Questions ending with '?' consistently perform best"),
("Avoid yes / no",
"Open-ended questions return richer, more informative answers"),
]):
st.markdown(f"""
<div class="tip-row">
<span class="tip-num">{i+1:02}</span>
<div>
<div class="tip-title">{t}</div>
<div class="tip-body">{d}</div>
</div>
</div>""", unsafe_allow_html=True)
st.markdown("</div></div>", unsafe_allow_html=True)
st.markdown('<div class="card rise r3" style="margin-top:1rem">', unsafe_allow_html=True)
st.markdown('<div class="card-body">', unsafe_allow_html=True)
st.markdown('<span class="card-label">Model</span>', unsafe_allow_html=True)
for k, v in [
("Architecture", "RoBERTa Base"),
("Training Data", "SQuAD 2.0"),
("Task Type", "Extractive Q&A"),
("Provider", "deepset Β· Hugging Face"),
]:
st.markdown(f"""
<div style="display:flex;justify-content:space-between;align-items:center;
padding:9px 0;border-bottom:1px solid var(--border);
font-size:.83rem;">
<span style="color:var(--text-secondary);font-weight:400;">{k}</span>
<span style="font-weight:500;color:var(--text-primary);">{v}</span>
</div>""", unsafe_allow_html=True)
st.markdown("</div></div>", unsafe_allow_html=True)
st.markdown("</div></div>", unsafe_allow_html=True)
# ─────────────────────────────────────────────────────────────────────────────
# ══════════════ PAGE: INSIGHTS ═══════════════════════════════════════════════
# ─────────────────────────────────────────────────────────────────────────────
elif pg == "insights":
# Dark hero section
st.markdown("""
<div class="section-dark">
<div class="section-inner">
<span class="s-label s-label-violet rise r1"
style="color:rgba(167,139,250,.85);">
Component 03 Β· Visual Insights
</span>
<h2 class="s-h s-h-light rise r2">Clarity from<br>every angle.</h2>
<p class="s-p s-p-light rise r3">
Distribution breakdowns, word clouds, confidence analysis,
and article spotlights β€” everything you need to understand
your classified corpus at a glance.
</p>
</div>
</div>
<hr class="div-line">
""", unsafe_allow_html=True)
if "out_df" not in st.session_state:
st.markdown("""
<div class="section"><div class="section-inner">
<div class="empty-state">
<span class="empty-icon">β—ˆ</span>
<div class="empty-title">No classified data yet</div>
<div class="empty-sub">
Run the <strong>Classify</strong> pipeline first,
then return here for visual insights.
</div>
</div>
</div></div>""", unsafe_allow_html=True)
st.stop()
out_df = st.session_state["out_df"]
total = len(out_df)
counts = out_df["class"].value_counts()
# ── Section A: Distribution ──────────────────────────────────────────
st.markdown("""
<div class="section">
<div class="section-inner">
<span class="s-label rise r1">01 Β· Distribution</span>
<h2 class="s-h rise r2" style="font-size:2.2rem;margin-bottom:.5rem;">
How your corpus breaks down.
</h2>
</div>
</div>
""", unsafe_allow_html=True)
st.markdown('<div class="section-alt"><div class="section-inner">', unsafe_allow_html=True)
col_da, col_db = st.columns([2, 3], gap="large")
with col_da:
st.markdown('<div class="card rise r1"><div class="card-body">', unsafe_allow_html=True)
st.markdown('<span class="card-label">Breakdown</span>', unsafe_allow_html=True)
for label, meta in CATS.items():
n = counts.get(label, 0)
pct = n / total if total > 0 else 0
st.markdown(f"""
<div style="display:flex;align-items:center;gap:12px;margin-bottom:14px;">
<span style="font-size:1.1rem;width:24px;text-align:center">{meta['icon']}</span>
<div style="flex:1">
<div style="display:flex;justify-content:space-between;
font-size:.82rem;font-weight:500;
color:var(--text-primary);margin-bottom:5px;">
<span>{label.replace('_',' ')}</span>
<span style="color:{meta['color']};font-weight:600;">
{n} Β· {pct:.0%}
</span>
</div>
<div class="conf-bg">
<div class="conf-fg"
style="width:{pct*100:.1f}%;background:{meta['color']}">
</div>
</div>
</div>
</div>""", unsafe_allow_html=True)
st.markdown("</div></div>", unsafe_allow_html=True)
with col_db:
try:
import plotly.express as px
cdf = counts.reset_index()
cdf.columns = ["Category","Count"]
cdf["Label"] = cdf["Category"].str.replace("_"," ")
cmap = {k: CATS[k]["color"] for k in CATS}
fig = px.bar(cdf, x="Label", y="Count", color="Category",
color_discrete_map=cmap, text="Count",
labels={"Label":"","Count":""})
fig.update_layout(
plot_bgcolor="white",paper_bgcolor="white",
font=dict(family="-apple-system,BlinkMacSystemFont,'SF Pro Text',sans-serif",
size=12,color="#1d1d1f"),
showlegend=False,margin=dict(l=0,r=0,t=10,b=0),
xaxis=dict(showgrid=False,color="#86868b",
tickfont=dict(size=11,color="#6e6e73")),
yaxis=dict(gridcolor="#f5f5f7",color="#86868b"),
)
fig.update_traces(textposition="outside",
textfont=dict(size=12,color="#1d1d1f"),
marker_line_width=0,
marker_corner_radius=6)
st.plotly_chart(fig, use_container_width=True)
except ImportError:
st.bar_chart(counts, use_container_width=True, height=270)
st.markdown("</div></div>", unsafe_allow_html=True)
# ── Section B: Word Cloud ────────────────────────────────────────────
st.markdown("""
<div class="section">
<div class="section-inner">
<span class="s-label rise r1">02 Β· Word Cloud</span>
<h2 class="s-h rise r2" style="font-size:2.2rem;margin-bottom:.5rem;">
The language of the news.
</h2>
</div>
</div>
""", unsafe_allow_html=True)
st.markdown('<div class="section-alt"><div class="section-inner">', unsafe_allow_html=True)
col_wl, col_wr = st.columns([2, 3], gap="large")
with col_wl:
st.markdown('<div class="card rise r1"><div class="card-body">', unsafe_allow_html=True)
st.markdown('<span class="card-label">Configure</span>', unsafe_allow_html=True)
st.markdown('<div class="card-title" style="margin-bottom:1rem">'
'Build word cloud</div>', unsafe_allow_html=True)
wc_sel = st.selectbox("Category Filter",
["All"]+[l.replace("_"," ") for l in CATS],
key="wc_cat")
wc_n = st.slider("Number of Words", 20, 120, 70, key="wc_n")
st.markdown("<br>", unsafe_allow_html=True)
if st.button("Generate Word Cloud", key="run_wc"):
lbl = wc_sel.replace(" ","_") if wc_sel != "All" else "All"
corpus = (" ".join(out_df["content"].fillna("").tolist()) if lbl == "All"
else " ".join(
out_df[out_df["class"].isin([lbl,wc_sel])]["content"]
.fillna("").tolist()))
try:
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
accent = CATS.get(lbl,{}).get("color","#0071e3")
processed = preprocess(corpus)
def _cf(*a,**k):
r,g,b = mcolors.to_rgb(accent)
f = random.uniform(.45,1.)
return f"rgb({int(r*f*255)},{int(g*f*255)},{int(b*f*255)})"
wc = WordCloud(width=900,height=360,
background_color="white",
color_func=_cf,max_words=wc_n,
prefer_horizontal=.82).generate(processed)
fig_wc,ax = plt.subplots(figsize=(12,4))
ax.imshow(wc,interpolation="bilinear"); ax.axis("off")
fig_wc.patch.set_facecolor("white"); plt.tight_layout(pad=0)
st.session_state["wc_fig"] = fig_wc
st.session_state["wc_html"] = None
except ImportError:
st.session_state["wc_html"] = word_cloud_html(preprocess(corpus), wc_n)
st.session_state["wc_fig"] = None
st.markdown("</div></div>", unsafe_allow_html=True)
with col_wr:
st.markdown('<div class="wc-wrap rise r1">', unsafe_allow_html=True)
st.markdown('<span class="card-label" style="display:block;margin-bottom:.8rem">'
'Word Frequency Canvas</span>', unsafe_allow_html=True)
if st.session_state.get("wc_fig"):
import matplotlib.pyplot as plt
st.pyplot(st.session_state["wc_fig"])
elif st.session_state.get("wc_html"):
st.markdown(st.session_state["wc_html"], unsafe_allow_html=True)
else:
st.markdown("""
<div style="text-align:center;padding:5rem 1rem;">
<div style="font-size:3rem;opacity:.12;margin-bottom:1rem">β—Ž</div>
<div style="font-size:.95rem;color:var(--text-tertiary);">
Configure and generate your word cloud
</div>
</div>""", unsafe_allow_html=True)
st.markdown("</div>", unsafe_allow_html=True)
st.markdown("</div></div>", unsafe_allow_html=True)
# ── Section C: Confidence ────────────────────────────────────────────
if "confidence" in out_df.columns:
st.markdown("""
<div class="section">
<div class="section-inner">
<span class="s-label rise r1">03 Β· Confidence Analysis</span>
<h2 class="s-h rise r2" style="font-size:2.2rem;margin-bottom:.5rem;">
How certain is the model?
</h2>
</div>
</div>
""", unsafe_allow_html=True)
st.markdown('<div class="section-alt"><div class="section-inner">', unsafe_allow_html=True)
c1,c2,c3 = st.columns(3, gap="large")
for col,(val,lbl,color) in zip([c1,c2,c3],[
(f"{out_df['confidence'].mean():.1%}","Average Confidence","#0071e3"),
(str((out_df["confidence"]>=.9).sum()),"High Confidence β‰₯ 90%","#34c759"),
(str((out_df["confidence"]<.7).sum()), "Low Confidence < 70%", "#ff3b30"),
]):
with col:
st.markdown(f"""
<div class="metric-card rise r1">
<div class="metric-val" style="color:{color}">{val}</div>
<div class="metric-lbl">{lbl}</div>
</div>""", unsafe_allow_html=True)
st.markdown("<br>", unsafe_allow_html=True)
try:
import plotly.express as px
cmap = {k: CATS[k]["color"] for k in CATS}
fig2 = px.histogram(out_df,x="confidence",color="class",
nbins=25,color_discrete_map=cmap,
labels={"confidence":"Confidence Score","class":""})
fig2.update_layout(
plot_bgcolor="white",paper_bgcolor="white",
font=dict(family="-apple-system,BlinkMacSystemFont,'SF Pro Text',sans-serif",
size=11,color="#1d1d1f"),
margin=dict(l=0,r=0,t=10,b=0),bargap=.06,
xaxis=dict(showgrid=False,color="#86868b"),
yaxis=dict(gridcolor="#f5f5f7",color="#86868b"),
legend=dict(bgcolor="white",bordercolor="#e2e2e7",borderwidth=1,
font=dict(size=11)),
)
st.plotly_chart(fig2, use_container_width=True)
except ImportError:
st.dataframe(out_df.groupby("class")["confidence"].describe().round(3),
use_container_width=True)
st.markdown("</div></div>", unsafe_allow_html=True)
# ── Section D: Article Length ────────────────────────────────────────
st.markdown("""
<div class="section">
<div class="section-inner">
<span class="s-label rise r1">04 Β· Article Length</span>
<h2 class="s-h rise r2" style="font-size:2.2rem;margin-bottom:.5rem;">
Word count by category.
</h2>
</div>
</div>
""", unsafe_allow_html=True)
st.markdown('<div class="section-alt"><div class="section-inner">', unsafe_allow_html=True)
out_df["word_count"] = out_df["content"].fillna("").apply(lambda x: len(x.split()))
try:
import plotly.express as px
cmap = {k: CATS[k]["color"] for k in CATS}
fig3 = px.box(out_df,x="class",y="word_count",color="class",
color_discrete_map=cmap,points="outliers",
labels={"class":"","word_count":"Word Count"})
fig3.update_layout(
plot_bgcolor="white",paper_bgcolor="white",
font=dict(family="-apple-system,BlinkMacSystemFont,'SF Pro Text',sans-serif",
size=11,color="#1d1d1f"),
showlegend=False,margin=dict(l=0,r=0,t=10,b=0),
xaxis=dict(showgrid=False,color="#86868b",
tickfont=dict(size=11,color="#6e6e73")),
yaxis=dict(gridcolor="#f5f5f7",color="#86868b"),
)
st.plotly_chart(fig3, use_container_width=True)
except ImportError:
st.dataframe(out_df.groupby("class")["word_count"].describe().round(1),
use_container_width=True)
st.markdown("</div></div>", unsafe_allow_html=True)
# ── Section E: Spotlight ─────────────────────────────────────────────
st.markdown("""
<div class="section">
<div class="section-inner">
<span class="s-label rise r1">05 Β· Article Spotlight</span>
<h2 class="s-h rise r2" style="font-size:2.2rem;margin-bottom:.5rem;">
Discover a random article.
</h2>
</div>
</div>
""", unsafe_allow_html=True)
st.markdown('<div class="section-alt"><div class="section-inner">', unsafe_allow_html=True)
if st.button("Shuffle Article", key="spot"):
row = out_df.sample(1).iloc[0]
label = row.get("class","")
meta = CATS.get(label, {"icon":"β—‰","color":"#1d1d1f","bg":"#f5f5f7"})
conf_v = row.get("confidence", None)
text = str(row["content"])
wc_c = len(text.split())
st.markdown(f"""
<div class="spotlight" style="border-top:2px solid {meta['color']};">
<div style="display:flex;align-items:center;gap:8px;
margin-bottom:1.3rem;flex-wrap:wrap;">
<span class="spot-badge"
style="color:{meta['color']};border-color:{meta['color']}30;
background:{meta['bg']};">
{meta['icon']}&nbsp; {label.replace('_',' ')}
</span>
{f'<span class="spot-badge" style="color:var(--text-secondary);border-color:var(--border);background:var(--bg-secondary);">{conf_v:.1%} confidence</span>' if conf_v else ""}
<span class="spot-badge"
style="color:var(--text-secondary);border-color:var(--border);
background:var(--bg-secondary);">
{wc_c} words
</span>
</div>
<div class="spot-text">
{text[:640]}{"…" if len(text)>640 else ""}
</div>
</div>""", unsafe_allow_html=True)
st.markdown("</div></div>", unsafe_allow_html=True)
# ─────────────────────────────────────────────────────────────────────────────
# ══════════════ FOOTER ═══════════════════════════════════════════════════════
# ─────────────────────────────────────────────────────────────────────────────
st.markdown("""
<div id="footer">
<div class="footer-inner">
<div class="footer-top">
<div>
<div class="footer-brand">β—‰ News<span>Lens</span> AI</div>
<div class="footer-tagline">
Daily Mirror Β· AI Intelligence Β· DA3111 Text Analytics
</div>
</div>
<div class="footer-links">
<span class="footer-link">Streamlit</span>
<span class="footer-link">Hugging Face</span>
<span class="footer-link">Transformers</span>
<span class="footer-link">NLTK</span>
<span class="footer-link">Plotly</span>
</div>
</div>
<div style="display:flex;justify-content:space-between;flex-wrap:wrap;gap:8px;">
<p class="footer-copy">
Copyright Β© 2026 NewsLens AI. DA3111 Text Analytics Assignment 01.
</p>
<p class="footer-copy">
Model: Akilashamnaka12/news_classifier_model
</p>
</div>
</div>
</div>
""", unsafe_allow_html=True)