UIDAI / app.py
amanchoudhary2112's picture
Update app.py
9aea748 verified
raw
history blame
15 kB
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
from datetime import datetime
# 1. PAGE CONFIGURATION
st.set_page_config(
page_title="S.T.A.R.K AI | UIDAI Fraud Detection",
page_icon="",
layout="wide",
initial_sidebar_state="expanded"
)
# 2. PROFESSIONAL STYLING (THEME OVERRIDE)
st.markdown("""
<style>
/* IMPORT FONTS */
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
/* FORCE LIGHT THEME BACKGROUNDS & TEXT */
.stApp {
background-color: #f8fafc; /* Light Blue-Grey */
color: #0f172a; /* Slate 900 */
font-family: 'Inter', sans-serif;
}
/* METRIC CARDS - GLASSMORPHISM */
div[data-testid="stMetric"] {
background-color: #ffffff;
border: 1px solid #e2e8f0;
border-radius: 8px;
padding: 15px;
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
transition: transform 0.2s;
}
div[data-testid="stMetric"]:hover {
transform: translateY(-2px);
box-shadow: 0 10px 15px -3px rgba(0, 0, 0, 0.1);
}
/* FORCE DARK TEXT FOR METRICS (Fixes White-on-White) */
div[data-testid="stMetricValue"] {
color: #0f172a !important;
font-weight: 700 !important;
}
div[data-testid="stMetricLabel"] {
color: #64748b !important; /* Slate 500 */
}
/* DATAFRAME STYLING (Fixes White-on-White) */
div[data-testid="stDataFrame"] div[role="grid"] {
color: #334155 !important; /* Slate 700 */
background-color: white !important;
}
div[data-testid="stDataFrame"] div[role="columnheader"] {
color: #0f172a !important;
font-weight: 600 !important;
background-color: #f1f5f9 !important;
}
/* SIDEBAR STYLING */
[data-testid="stSidebar"] {
background-color: #1e293b; /* Slate 800 */
}
[data-testid="stSidebar"] * {
color: #f8fafc !important; /* Light text for sidebar */
}
[data-testid="stSidebar"] .stSelectbox label,
[data-testid="stSidebar"] .stMultiSelect label {
color: #94a3b8 !important;
}
/* HEADERS */
h1, h2, h3 {
color: #0f172a !important;
font-weight: 700 !important;
}
/* CUSTOM BADGES */
.status-badge {
display: inline-flex;
align-items: center;
padding: 4px 12px;
border-radius: 9999px;
font-size: 12px;
font-weight: 600;
}
.bg-red { background-color: #fee2e2; color: #991b1b; }
.bg-green { background-color: #dcfce7; color: #166534; }
/* MAP CANVAS FIX */
.js-plotly-plot .plotly .main-svg {
background-color: rgba(0,0,0,0) !important;
}
</style>
""", unsafe_allow_html=True)
# 3. SMART DATA LOADING (MAPPING)
@st.cache_data
def load_data():
# 1. Load or Generate Data
try:
df = pd.read_csv('analyzed_aadhaar_data.csv')
except FileNotFoundError:
# Dummy Data Generator if file missing
dates = pd.date_range(start="2025-01-01", periods=200)
df = pd.DataFrame({
'date': dates,
'state': np.random.choice(['Maharashtra', 'Uttar Pradesh', 'Bihar', 'Karnataka', 'Delhi', 'West Bengal', 'Tamil Nadu', 'Gujarat', 'Rajasthan', 'Kerala'], 200),
'district': np.random.choice(['North', 'South', 'East', 'West', 'Central', 'Rural A', 'Urban B'], 200),
'pincode': np.random.randint(110001, 800000, 200),
'RISK_SCORE': np.random.uniform(15, 99, 200),
'total_activity': np.random.randint(50, 800, 200),
'enrol_adult': np.random.randint(10, 400, 200),
'ratio_deviation': np.random.uniform(-0.15, 0.6, 200),
'is_weekend': np.random.choice([0, 1], 200, p=[0.7, 0.3])
})
# Standardize Date
if 'date' in df.columns:
df['date'] = pd.to_datetime(df['date'])
# SMART GEO-CLUSTERING LOGIC
# Comprehensive Center Points for Indian States & UTs
state_centers = {
'Andaman and Nicobar Islands': (11.7401, 92.6586),
'Andhra Pradesh': (15.9129, 79.7400),
'Arunachal Pradesh': (28.2180, 94.7278),
'Assam': (26.2006, 92.9376),
'Bihar': (25.0961, 85.3131),
'Chandigarh': (30.7333, 76.7794),
'Chhattisgarh': (21.2787, 81.8661),
'Dadra and Nagar Haveli and Daman and Diu': (20.4283, 72.8397),
'Delhi': (28.7041, 77.1025),
'Goa': (15.2993, 74.1240),
'Gujarat': (22.2587, 71.1924),
'Haryana': (29.0588, 76.0856),
'Himachal Pradesh': (31.9579, 77.1095),
'Jammu and Kashmir': (33.7782, 76.5762),
'Jharkhand': (23.6102, 85.2799),
'Karnataka': (15.3173, 75.7139),
'Kerala': (10.8505, 76.2711),
'Ladakh': (34.1526, 77.5770),
'Lakshadweep': (10.5667, 72.6417),
'Madhya Pradesh': (22.9734, 78.6569),
'Maharashtra': (19.7515, 75.7139),
'Manipur': (24.6637, 93.9063),
'Meghalaya': (25.4670, 91.3662),
'Mizoram': (23.1645, 92.9376),
'Nagaland': (26.1584, 94.5624),
'Odisha': (20.9517, 85.0985),
'Puducherry': (11.9416, 79.8083),
'Punjab': (31.1471, 75.3412),
'Rajasthan': (27.0238, 74.2179),
'Sikkim': (27.5330, 88.5122),
'Tamil Nadu': (11.1271, 78.6569),
'Telangana': (18.1124, 79.0193),
'Tripura': (23.9408, 91.9882),
'Uttar Pradesh': (26.8467, 80.9462),
'Uttarakhand': (30.0668, 79.0193),
'West Bengal': (22.9868, 87.8550)
}
def get_coords(row):
state = row.get('state', 'Delhi')
district = str(row.get('district', 'Unknown'))
# 1. Get State Base Coordinates
base_lat, base_lon = state_centers.get(state, (20.5937, 78.9629)) # Default to India Center
# 2. DETERMINISTIC HASHING FOR DISTRICT
# This ensures "District A" is ALWAYS in the same spot relative to the State Center
# Creates distinct clusters instead of random noise
district_hash = hash(state + district)
np.random.seed(district_hash % 2**32)
# Offset the district center by up to 1.5 degrees (~150km) from state center
dist_lat_offset = np.random.uniform(-1.5, 1.5)
dist_lon_offset = np.random.uniform(-1.5, 1.5)
# 3. INDIVIDUAL CENTER JITTER
# Add tiny random noise (~4km) so points don't stack perfectly
# We re-seed with None to get true randomness for the jitter
np.random.seed(None)
noise_lat = np.random.normal(0, 0.04)
noise_lon = np.random.normal(0, 0.04)
return pd.Series({
'lat': base_lat + dist_lat_offset + noise_lat,
'lon': base_lon + dist_lon_offset + noise_lon
})
# Apply coordinates
coords = df.apply(get_coords, axis=1)
df['lat'] = coords['lat']
df['lon'] = coords['lon']
# Risk Categories
df['risk_category'] = pd.cut(
df['RISK_SCORE'],
bins=[-1, 50, 75, 85, 100],
labels=['Low', 'Medium', 'High', 'Critical']
)
return df
# Load Data
df = load_data()
# 4. SIDEBAR & FILTERS
with st.sidebar:
st.markdown("### S.T.A.R.K AI Control")
st.markdown("---")
# State Filter
state_list = ['All'] + sorted(df['state'].unique().tolist())
selected_state = st.selectbox("Select State", state_list)
# District Filter
if selected_state != 'All':
filtered_df = df[df['state'] == selected_state]
district_list = ['All'] + sorted(filtered_df['district'].unique().tolist())
else:
filtered_df = df.copy()
district_list = ['All']
selected_district = st.selectbox("Select District", district_list)
if selected_district != 'All':
filtered_df = filtered_df[filtered_df['district'] == selected_district]
st.markdown("---")
# Risk Filter
risk_filter = st.multiselect(
"Risk Level",
options=['Low', 'Medium', 'High', 'Critical'],
default=['High', 'Critical']
)
if risk_filter:
filtered_df = filtered_df[filtered_df['risk_category'].isin(risk_filter)]
st.markdown("---")
# Links
st.markdown("**Resources**")
st.link_button("Open Notebook in Colab", "https://colab.research.google.com/drive/1YAQ4nfxltvG_cts3fmGc_zi2JQc4oPOT?usp=sharing")
st.markdown("---")
st.info(f"**User:** UIDAI_Officer\n\n**Team:** UIDAI_4571")
# 5. HEADER & KPI METRICS
col1, col2 = st.columns([3, 1])
with col1:
st.title("Project S.T.A.R.K AI Dashboard")
st.markdown("Context-Aware Fraud Detection System")
with col2:
st.markdown("""
<div style="text-align: right; padding-top: 20px;">
<span class="status-badge bg-green">System Online</span>
<div style="font-size: 12px; color: #64748b; margin-top: 5px;">Live Monitor</div>
</div>
""", unsafe_allow_html=True)
st.markdown("---")
# METRICS ROW
m1, m2, m3, m4 = st.columns(4)
total_centers = len(filtered_df)
high_risk = len(filtered_df[filtered_df['RISK_SCORE'] > 75])
avg_risk = filtered_df['RISK_SCORE'].mean() if not filtered_df.empty else 0
weekend_alerts = len(filtered_df[(filtered_df['is_weekend'] == 1) & (filtered_df['RISK_SCORE'] > 70)])
m1.metric("Total Centers", f"{total_centers:,}", border=True)
m2.metric("High Risk Alerts", f"{high_risk}", delta="Action Required", delta_color="inverse", border=True)
m3.metric("Avg. Risk Score", f"{avg_risk:.1f}/100", border=True)
m4.metric("Weekend Spikes", f"{weekend_alerts}", "Unauthorized", delta_color="off", border=True)
st.markdown("##") # Spacer
# 6. MAIN TABS
tab_map, tab_list, tab_charts = st.tabs(["Geographic Risk", "Priority List", "Pattern Analytics"])
# TAB 1: GEOGRAPHIC RISK (MAP)
with tab_map:
col_map, col_details = st.columns([3, 1])
with col_map:
if not filtered_df.empty:
# Using Open-Street-Map for better contrast and no-token requirement
fig_map = px.scatter_mapbox(
filtered_df,
lat="lat",
lon="lon",
color="RISK_SCORE",
size="total_activity",
# Traffic Light Colors: Green -> Yellow -> Red
color_continuous_scale=["#22c55e", "#eab308", "#ef4444"],
size_max=20,
zoom=4.5 if selected_state != 'All' else 3.5,
center={"lat": 22.0, "lon": 80.0}, # Center of India
hover_name="pincode",
hover_data={"district": True, "state": True, "RISK_SCORE": True, "lat": False, "lon": False},
mapbox_style="open-street-map",
height=600,
title="<b>Live Fraud Risk Heatmap</b>"
)
fig_map.update_layout(margin={"r":0,"t":40,"l":0,"b":0})
st.plotly_chart(fig_map, use_container_width=True)
else:
st.warning("No data matches current filters.")
with col_details:
st.subheader("Top Hotspots")
if not filtered_df.empty:
top_districts = filtered_df.groupby('district')['RISK_SCORE'].mean().sort_values(ascending=False).head(5)
for district, score in top_districts.items():
# Color code the side bar
color = "#ef4444" if score > 80 else "#f59e0b"
st.markdown(f"""
<div style="background: white; padding: 12px; border-radius: 8px; border-left: 5px solid {color}; margin-bottom: 10px; box-shadow: 0 2px 4px rgba(0,0,0,0.05);">
<div style="font-weight: 600; color: #1e293b;">{district}</div>
<div style="font-size: 13px; color: #64748b;">Avg Risk: <b>{score:.1f}</b></div>
</div>
""", unsafe_allow_html=True)
# TAB 2: PRIORITY LIST (DATAFRAME)
with tab_list:
st.subheader("Target Investigation List")
st.markdown("Filter: *Showing centers with Risk Score > 75*")
target_list = filtered_df[filtered_df['RISK_SCORE'] > 75].sort_values('RISK_SCORE', ascending=False)
st.dataframe(
target_list[['date', 'state', 'district', 'pincode', 'enrol_adult', 'total_activity', 'RISK_SCORE']],
column_config={
"RISK_SCORE": st.column_config.ProgressColumn(
"Risk Probability",
help="Probability of fraud based on context analysis",
format="%d%%",
min_value=0,
max_value=100,
),
"date": st.column_config.DateColumn("Date", format="DD MMM YYYY"),
"total_activity": st.column_config.NumberColumn("Volume"),
"enrol_adult": st.column_config.NumberColumn("Adult Enrols"),
},
use_container_width=True,
hide_index=True,
height=400
)
# Export Button
csv = target_list.to_csv(index=False).encode('utf-8')
st.download_button(
"Download CSV",
data=csv,
file_name="uidai_S.T.A.R.K AI_priority_list.csv",
mime="text/csv",
type="primary"
)
# --- TAB 3: CHARTS ---
with tab_charts:
c1, c2 = st.columns(2)
with c1:
st.subheader("Ghost ID Pattern (Ratio Deviation)")
# Scatter Plot
fig_scatter = px.scatter(
filtered_df,
x="total_activity",
y="ratio_deviation",
color="risk_category",
color_discrete_map={'Critical': '#ef4444', 'High': '#f97316', 'Medium': '#eab308', 'Low': '#22c55e'},
title="Deviation from District Baseline",
labels={"ratio_deviation": "Deviation Score", "total_activity": "Daily Transactions"},
hover_data=['pincode', 'district']
)
fig_scatter.add_hline(y=0.2, line_dash="dash", line_color="red", annotation_text="Fraud Threshold")
st.plotly_chart(fig_scatter, use_container_width=True)
with c2:
st.subheader("Risk Distribution")
# Histogram
fig_hist = px.histogram(
filtered_df,
x="RISK_SCORE",
nbins=20,
color_discrete_sequence=['#3b82f6'],
title="Frequency of Risk Scores"
)
fig_hist.update_layout(bargap=0.1)
st.plotly_chart(fig_hist, use_container_width=True)
# 7. FOOTER
st.markdown("---")
st.markdown("""
<div style="text-align: center; font-size: 13px; color: #94a3b8;">
<b>Project S.T.A.R.K AI</b> | UIDAI Hackathon 2026 | Team UIDAI_4571<br>
<i>Confidential - For Official Use Only</i>
</div>
""", unsafe_allow_html=True)