Spaces:

LovnishVerma
/

UIDAI

Sleeping

App Files Files Community

UIDAI / app.py

amanchoudhary2112

Update app.py

9aea748 verified about 1 month ago

raw

history blame

15 kB

	import streamlit as st
	import pandas as pd
	import plotly.express as px
	import plotly.graph_objects as go
	import numpy as np
	from datetime import datetime

	# 1. PAGE CONFIGURATION
	st.set_page_config(
	page_title="S.T.A.R.K AI \| UIDAI Fraud Detection",
	page_icon="",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# 2. PROFESSIONAL STYLING (THEME OVERRIDE)
	st.markdown("""
	<style>
	/* IMPORT FONTS */
	@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');

	/* FORCE LIGHT THEME BACKGROUNDS & TEXT */
	.stApp {
	background-color: #f8fafc; /* Light Blue-Grey */
	color: #0f172a; /* Slate 900 */
	font-family: 'Inter', sans-serif;
	}

	/* METRIC CARDS - GLASSMORPHISM */
	div[data-testid="stMetric"] {
	background-color: #ffffff;
	border: 1px solid #e2e8f0;
	border-radius: 8px;
	padding: 15px;
	box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
	transition: transform 0.2s;
	}
	div[data-testid="stMetric"]:hover {
	transform: translateY(-2px);
	box-shadow: 0 10px 15px -3px rgba(0, 0, 0, 0.1);
	}

	/* FORCE DARK TEXT FOR METRICS (Fixes White-on-White) */
	div[data-testid="stMetricValue"] {
	color: #0f172a !important;
	font-weight: 700 !important;
	}
	div[data-testid="stMetricLabel"] {
	color: #64748b !important; /* Slate 500 */
	}

	/* DATAFRAME STYLING (Fixes White-on-White) */
	div[data-testid="stDataFrame"] div[role="grid"] {
	color: #334155 !important; /* Slate 700 */
	background-color: white !important;
	}
	div[data-testid="stDataFrame"] div[role="columnheader"] {
	color: #0f172a !important;
	font-weight: 600 !important;
	background-color: #f1f5f9 !important;
	}

	/* SIDEBAR STYLING */
	[data-testid="stSidebar"] {
	background-color: #1e293b; /* Slate 800 */
	}
	[data-testid="stSidebar"] * {
	color: #f8fafc !important; /* Light text for sidebar */
	}
	[data-testid="stSidebar"] .stSelectbox label,
	[data-testid="stSidebar"] .stMultiSelect label {
	color: #94a3b8 !important;
	}

	/* HEADERS */
	h1, h2, h3 {
	color: #0f172a !important;
	font-weight: 700 !important;
	}

	/* CUSTOM BADGES */
	.status-badge {
	display: inline-flex;
	align-items: center;
	padding: 4px 12px;
	border-radius: 9999px;
	font-size: 12px;
	font-weight: 600;
	}
	.bg-red { background-color: #fee2e2; color: #991b1b; }
	.bg-green { background-color: #dcfce7; color: #166534; }

	/* MAP CANVAS FIX */
	.js-plotly-plot .plotly .main-svg {
	background-color: rgba(0,0,0,0) !important;
	}
	</style>
	""", unsafe_allow_html=True)

	# 3. SMART DATA LOADING (MAPPING)
	@st.cache_data
	def load_data():
	# 1. Load or Generate Data
	try:
	df = pd.read_csv('analyzed_aadhaar_data.csv')
	except FileNotFoundError:
	# Dummy Data Generator if file missing
	dates = pd.date_range(start="2025-01-01", periods=200)
	df = pd.DataFrame({
	'date': dates,
	'state': np.random.choice(['Maharashtra', 'Uttar Pradesh', 'Bihar', 'Karnataka', 'Delhi', 'West Bengal', 'Tamil Nadu', 'Gujarat', 'Rajasthan', 'Kerala'], 200),
	'district': np.random.choice(['North', 'South', 'East', 'West', 'Central', 'Rural A', 'Urban B'], 200),
	'pincode': np.random.randint(110001, 800000, 200),
	'RISK_SCORE': np.random.uniform(15, 99, 200),
	'total_activity': np.random.randint(50, 800, 200),
	'enrol_adult': np.random.randint(10, 400, 200),
	'ratio_deviation': np.random.uniform(-0.15, 0.6, 200),
	'is_weekend': np.random.choice([0, 1], 200, p=[0.7, 0.3])
	})

	# Standardize Date
	if 'date' in df.columns:
	df['date'] = pd.to_datetime(df['date'])

	# SMART GEO-CLUSTERING LOGIC
	# Comprehensive Center Points for Indian States & UTs
	state_centers = {
	'Andaman and Nicobar Islands': (11.7401, 92.6586),
	'Andhra Pradesh': (15.9129, 79.7400),
	'Arunachal Pradesh': (28.2180, 94.7278),
	'Assam': (26.2006, 92.9376),
	'Bihar': (25.0961, 85.3131),
	'Chandigarh': (30.7333, 76.7794),
	'Chhattisgarh': (21.2787, 81.8661),
	'Dadra and Nagar Haveli and Daman and Diu': (20.4283, 72.8397),
	'Delhi': (28.7041, 77.1025),
	'Goa': (15.2993, 74.1240),
	'Gujarat': (22.2587, 71.1924),
	'Haryana': (29.0588, 76.0856),
	'Himachal Pradesh': (31.9579, 77.1095),
	'Jammu and Kashmir': (33.7782, 76.5762),
	'Jharkhand': (23.6102, 85.2799),
	'Karnataka': (15.3173, 75.7139),
	'Kerala': (10.8505, 76.2711),
	'Ladakh': (34.1526, 77.5770),
	'Lakshadweep': (10.5667, 72.6417),
	'Madhya Pradesh': (22.9734, 78.6569),
	'Maharashtra': (19.7515, 75.7139),
	'Manipur': (24.6637, 93.9063),
	'Meghalaya': (25.4670, 91.3662),
	'Mizoram': (23.1645, 92.9376),
	'Nagaland': (26.1584, 94.5624),
	'Odisha': (20.9517, 85.0985),
	'Puducherry': (11.9416, 79.8083),
	'Punjab': (31.1471, 75.3412),
	'Rajasthan': (27.0238, 74.2179),
	'Sikkim': (27.5330, 88.5122),
	'Tamil Nadu': (11.1271, 78.6569),
	'Telangana': (18.1124, 79.0193),
	'Tripura': (23.9408, 91.9882),
	'Uttar Pradesh': (26.8467, 80.9462),
	'Uttarakhand': (30.0668, 79.0193),
	'West Bengal': (22.9868, 87.8550)
	}

	def get_coords(row):
	state = row.get('state', 'Delhi')
	district = str(row.get('district', 'Unknown'))

	# 1. Get State Base Coordinates
	base_lat, base_lon = state_centers.get(state, (20.5937, 78.9629)) # Default to India Center

	# 2. DETERMINISTIC HASHING FOR DISTRICT
	# This ensures "District A" is ALWAYS in the same spot relative to the State Center
	# Creates distinct clusters instead of random noise
	district_hash = hash(state + district)
	np.random.seed(district_hash % 2**32)

	# Offset the district center by up to 1.5 degrees (~150km) from state center
	dist_lat_offset = np.random.uniform(-1.5, 1.5)
	dist_lon_offset = np.random.uniform(-1.5, 1.5)

	# 3. INDIVIDUAL CENTER JITTER
	# Add tiny random noise (~4km) so points don't stack perfectly
	# We re-seed with None to get true randomness for the jitter
	np.random.seed(None)
	noise_lat = np.random.normal(0, 0.04)
	noise_lon = np.random.normal(0, 0.04)

	return pd.Series({
	'lat': base_lat + dist_lat_offset + noise_lat,
	'lon': base_lon + dist_lon_offset + noise_lon
	})

	# Apply coordinates
	coords = df.apply(get_coords, axis=1)
	df['lat'] = coords['lat']
	df['lon'] = coords['lon']

	# Risk Categories
	df['risk_category'] = pd.cut(
	df['RISK_SCORE'],
	bins=[-1, 50, 75, 85, 100],
	labels=['Low', 'Medium', 'High', 'Critical']
	)

	return df

	# Load Data
	df = load_data()

	# 4. SIDEBAR & FILTERS
	with st.sidebar:
	st.markdown("### S.T.A.R.K AI Control")
	st.markdown("---")

	# State Filter
	state_list = ['All'] + sorted(df['state'].unique().tolist())
	selected_state = st.selectbox("Select State", state_list)

	# District Filter
	if selected_state != 'All':
	filtered_df = df[df['state'] == selected_state]
	district_list = ['All'] + sorted(filtered_df['district'].unique().tolist())
	else:
	filtered_df = df.copy()
	district_list = ['All']

	selected_district = st.selectbox("Select District", district_list)

	if selected_district != 'All':
	filtered_df = filtered_df[filtered_df['district'] == selected_district]

	st.markdown("---")

	# Risk Filter
	risk_filter = st.multiselect(
	"Risk Level",
	options=['Low', 'Medium', 'High', 'Critical'],
	default=['High', 'Critical']
	)

	if risk_filter:
	filtered_df = filtered_df[filtered_df['risk_category'].isin(risk_filter)]

	st.markdown("---")

	# Links
	st.markdown("Resources")
	st.link_button("Open Notebook in Colab", "https://colab.research.google.com/drive/1YAQ4nfxltvG_cts3fmGc_zi2JQc4oPOT?usp=sharing")

	st.markdown("---")
	st.info(f"User: UIDAI_Officer\n\nTeam: UIDAI_4571")

	# 5. HEADER & KPI METRICS
	col1, col2 = st.columns([3, 1])
	with col1:
	st.title("Project S.T.A.R.K AI Dashboard")
	st.markdown("Context-Aware Fraud Detection System")

	with col2:
	st.markdown("""
	<div style="text-align: right; padding-top: 20px;">
	<span class="status-badge bg-green">System Online</span>
	<div style="font-size: 12px; color: #64748b; margin-top: 5px;">Live Monitor</div>
	</div>
	""", unsafe_allow_html=True)

	st.markdown("---")

	# METRICS ROW
	m1, m2, m3, m4 = st.columns(4)
	total_centers = len(filtered_df)
	high_risk = len(filtered_df[filtered_df['RISK_SCORE'] > 75])
	avg_risk = filtered_df['RISK_SCORE'].mean() if not filtered_df.empty else 0
	weekend_alerts = len(filtered_df[(filtered_df['is_weekend'] == 1) & (filtered_df['RISK_SCORE'] > 70)])

	m1.metric("Total Centers", f"{total_centers:,}", border=True)
	m2.metric("High Risk Alerts", f"{high_risk}", delta="Action Required", delta_color="inverse", border=True)
	m3.metric("Avg. Risk Score", f"{avg_risk:.1f}/100", border=True)
	m4.metric("Weekend Spikes", f"{weekend_alerts}", "Unauthorized", delta_color="off", border=True)

	st.markdown("##") # Spacer

	# 6. MAIN TABS
	tab_map, tab_list, tab_charts = st.tabs(["Geographic Risk", "Priority List", "Pattern Analytics"])

	# TAB 1: GEOGRAPHIC RISK (MAP)
	with tab_map:
	col_map, col_details = st.columns([3, 1])

	with col_map:
	if not filtered_df.empty:
	# Using Open-Street-Map for better contrast and no-token requirement
	fig_map = px.scatter_mapbox(
	filtered_df,
	lat="lat",
	lon="lon",
	color="RISK_SCORE",
	size="total_activity",
	# Traffic Light Colors: Green -> Yellow -> Red
	color_continuous_scale=["#22c55e", "#eab308", "#ef4444"],
	size_max=20,
	zoom=4.5 if selected_state != 'All' else 3.5,
	center={"lat": 22.0, "lon": 80.0}, # Center of India
	hover_name="pincode",
	hover_data={"district": True, "state": True, "RISK_SCORE": True, "lat": False, "lon": False},
	mapbox_style="open-street-map",
	height=600,
	title="<b>Live Fraud Risk Heatmap</b>"
	)
	fig_map.update_layout(margin={"r":0,"t":40,"l":0,"b":0})
	st.plotly_chart(fig_map, use_container_width=True)
	else:
	st.warning("No data matches current filters.")

	with col_details:
	st.subheader("Top Hotspots")
	if not filtered_df.empty:
	top_districts = filtered_df.groupby('district')['RISK_SCORE'].mean().sort_values(ascending=False).head(5)
	for district, score in top_districts.items():
	# Color code the side bar
	color = "#ef4444" if score > 80 else "#f59e0b"
	st.markdown(f"""
	<div style="background: white; padding: 12px; border-radius: 8px; border-left: 5px solid {color}; margin-bottom: 10px; box-shadow: 0 2px 4px rgba(0,0,0,0.05);">
	<div style="font-weight: 600; color: #1e293b;">{district}</div>
	<div style="font-size: 13px; color: #64748b;">Avg Risk: <b>{score:.1f}</b></div>
	</div>
	""", unsafe_allow_html=True)

	# TAB 2: PRIORITY LIST (DATAFRAME)
	with tab_list:
	st.subheader("Target Investigation List")
	st.markdown("Filter: Showing centers with Risk Score > 75")

	target_list = filtered_df[filtered_df['RISK_SCORE'] > 75].sort_values('RISK_SCORE', ascending=False)

	st.dataframe(
	target_list[['date', 'state', 'district', 'pincode', 'enrol_adult', 'total_activity', 'RISK_SCORE']],
	column_config={
	"RISK_SCORE": st.column_config.ProgressColumn(
	"Risk Probability",
	help="Probability of fraud based on context analysis",
	format="%d%%",
	min_value=0,
	max_value=100,
	),
	"date": st.column_config.DateColumn("Date", format="DD MMM YYYY"),
	"total_activity": st.column_config.NumberColumn("Volume"),
	"enrol_adult": st.column_config.NumberColumn("Adult Enrols"),
	},
	use_container_width=True,
	hide_index=True,
	height=400
	)

	# Export Button
	csv = target_list.to_csv(index=False).encode('utf-8')
	st.download_button(
	"Download CSV",
	data=csv,
	file_name="uidai_S.T.A.R.K AI_priority_list.csv",
	mime="text/csv",
	type="primary"
	)

	# --- TAB 3: CHARTS ---
	with tab_charts:
	c1, c2 = st.columns(2)

	with c1:
	st.subheader("Ghost ID Pattern (Ratio Deviation)")
	# Scatter Plot
	fig_scatter = px.scatter(
	filtered_df,
	x="total_activity",
	y="ratio_deviation",
	color="risk_category",
	color_discrete_map={'Critical': '#ef4444', 'High': '#f97316', 'Medium': '#eab308', 'Low': '#22c55e'},
	title="Deviation from District Baseline",
	labels={"ratio_deviation": "Deviation Score", "total_activity": "Daily Transactions"},
	hover_data=['pincode', 'district']
	)
	fig_scatter.add_hline(y=0.2, line_dash="dash", line_color="red", annotation_text="Fraud Threshold")
	st.plotly_chart(fig_scatter, use_container_width=True)

	with c2:
	st.subheader("Risk Distribution")
	# Histogram
	fig_hist = px.histogram(
	filtered_df,
	x="RISK_SCORE",
	nbins=20,
	color_discrete_sequence=['#3b82f6'],
	title="Frequency of Risk Scores"
	)
	fig_hist.update_layout(bargap=0.1)
	st.plotly_chart(fig_hist, use_container_width=True)

	# 7. FOOTER
	st.markdown("---")
	st.markdown("""
	<div style="text-align: center; font-size: 13px; color: #94a3b8;">
	<b>Project S.T.A.R.K AI</b> \| UIDAI Hackathon 2026 \| Team UIDAI_4571<br>
	<i>Confidential - For Official Use Only</i>
	</div>
	""", unsafe_allow_html=True)