StartupMap-India / backend /database.py
Ram2005's picture
v2.1: Server-side clustering, floating stats cards, enhanced sidebar UX, EntityDetail + AnalyticsPanel, optimized spatial indexing
9492321 verified
"""
Bharat Tech Atlas โ€” Database layer using SQLite with R-Tree spatial indexing.
"""
import sqlite3
import os
import json
import math
DB_PATH = os.path.join(os.path.dirname(__file__), "..", "data", "bharattechatlas.db")
def get_db():
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
conn.execute("PRAGMA journal_mode=WAL")
conn.execute("PRAGMA foreign_keys=ON")
conn.execute("PRAGMA cache_size=-8000")
conn.execute("PRAGMA mmap_size=67108864")
conn.execute("PRAGMA synchronous=NORMAL")
conn.execute("PRAGMA temp_store=MEMORY")
return conn
def init_db():
conn = get_db()
conn.executescript("""
CREATE TABLE IF NOT EXISTS entities (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
slug TEXT UNIQUE NOT NULL,
entity_type TEXT NOT NULL CHECK (entity_type IN (
'startup', 'sme', 'college_ecell', 'incubator',
'accelerator', 'coworking', 'investor'
)),
sectors TEXT NOT NULL DEFAULT '[]',
dpiit_category TEXT,
business_model TEXT CHECK (business_model IN (
'lifestyle', 'scalable', 'social', 'large_company', NULL
)),
stage TEXT CHECK (stage IN (
'ideation', 'validation', 'early_traction',
'scaling', 'mature', NULL
)),
dpiit_recognized INTEGER DEFAULT 0,
nsa_winner INTEGER DEFAULT 0,
nsa_category TEXT,
is_women_led INTEGER DEFAULT 0,
is_rural_impact INTEGER DEFAULT 0,
is_campus_startup INTEGER DEFAULT 0,
unicorn_status TEXT CHECK (unicorn_status IN (
'unicorn', 'soonicorn', NULL
)),
funding_inr REAL DEFAULT 0,
funding_stage TEXT,
last_funding_date TEXT,
funding_rounds TEXT DEFAULT '[]',
valuation_usd REAL,
description TEXT,
website TEXT,
logo_url TEXT,
linkedin_url TEXT,
instagram_url TEXT,
facebook_url TEXT,
twitter_url TEXT,
linkedin_team_size INTEGER,
linkedin_industry TEXT,
linkedin_specialties TEXT,
investors TEXT DEFAULT '[]',
ynos_profile_url TEXT,
address TEXT,
city TEXT NOT NULL,
district TEXT,
state TEXT NOT NULL,
pin_code TEXT,
latitude REAL NOT NULL,
longitude REAL NOT NULL,
founded_year INTEGER,
employee_count INTEGER,
college_name TEXT,
data_sources TEXT DEFAULT '[]',
created_at TEXT DEFAULT (datetime('now')),
updated_at TEXT DEFAULT (datetime('now')),
is_active INTEGER DEFAULT 1
);
CREATE INDEX IF NOT EXISTS idx_entities_type ON entities(entity_type) WHERE is_active = 1;
CREATE INDEX IF NOT EXISTS idx_entities_city ON entities(city);
CREATE INDEX IF NOT EXISTS idx_entities_state ON entities(state);
CREATE INDEX IF NOT EXISTS idx_entities_slug ON entities(slug);
CREATE INDEX IF NOT EXISTS idx_entities_founded ON entities(founded_year);
CREATE INDEX IF NOT EXISTS idx_entities_name ON entities(name COLLATE NOCASE);
CREATE INDEX IF NOT EXISTS idx_entities_dpiit_cat ON entities(dpiit_category);
CREATE INDEX IF NOT EXISTS idx_entities_biz_model ON entities(business_model);
CREATE INDEX IF NOT EXISTS idx_entities_unicorn ON entities(unicorn_status);
CREATE INDEX IF NOT EXISTS idx_entities_funding ON entities(funding_inr DESC) WHERE is_active = 1;
CREATE INDEX IF NOT EXISTS idx_entities_type_state ON entities(entity_type, state) WHERE is_active = 1;
CREATE INDEX IF NOT EXISTS idx_entities_type_funding ON entities(entity_type, funding_inr DESC) WHERE is_active = 1;
CREATE INDEX IF NOT EXISTS idx_entities_active_type ON entities(is_active, entity_type);
CREATE INDEX IF NOT EXISTS idx_entities_active_latlon ON entities(is_active, latitude, longitude);
""")
conn.execute("""
CREATE VIRTUAL TABLE IF NOT EXISTS entities_rtree
USING rtree(id, min_lng, max_lng, min_lat, max_lat)
""")
conn.executescript("""
CREATE TABLE IF NOT EXISTS sectors (
slug TEXT PRIMARY KEY,
label TEXT NOT NULL,
parent_slug TEXT,
icon TEXT,
color TEXT,
category TEXT DEFAULT 'top_sector',
FOREIGN KEY (parent_slug) REFERENCES sectors(slug)
);
INSERT OR IGNORE INTO sectors (slug, label, icon, color, category) VALUES
('fintech', 'FinTech', '๐Ÿ’ณ', '#3B82F6', 'top_sector'),
('saas_ai', 'SaaS / AI', 'โ˜๏ธ', '#6366F1', 'top_sector'),
('ecommerce', 'E-Commerce', '๐Ÿ›’', '#F59E0B', 'top_sector'),
('healthcare', 'Healthcare', '๐Ÿฅ', '#10B981', 'top_sector'),
('manufacturing', 'Manufacturing', '๐Ÿญ', '#78716C', 'top_sector'),
('edtech', 'EdTech', '๐Ÿ“š', '#8B5CF6', 'dpiit_category'),
('agritech', 'AgriTech', '๐ŸŒพ', '#84CC16', 'dpiit_category'),
('cleantech', 'CleanTech', '๐ŸŒฟ', '#22C55E', 'dpiit_category'),
('deeptech', 'DeepTech', '๐Ÿ”ฌ', '#EC4899', 'dpiit_category'),
('logistics', 'Logistics', '๐Ÿš›', '#F97316', 'dpiit_category'),
('gaming', 'Gaming', '๐ŸŽฎ', '#EF4444', 'dpiit_category'),
('ai_ml', 'AI / ML', '๐Ÿค–', '#7C3AED', 'dpiit_category'),
('cybersecurity', 'Cybersecurity', '๐Ÿ”’', '#0EA5E9', 'dpiit_category'),
('foodtech', 'FoodTech', '๐Ÿ”', '#D97706', 'dpiit_category'),
('proptech', 'PropTech', '๐Ÿ ', '#14B8A6', 'dpiit_category'),
('legaltech', 'LegalTech', 'โš–๏ธ', '#64748B', 'dpiit_category'),
('mediatech', 'MediaTech', '๐Ÿ“บ', '#E11D48', 'dpiit_category'),
('mobility', 'Mobility', '๐Ÿš—', '#0891B2', 'dpiit_category'),
('social_impact', 'Social Impact', '๐ŸŒ', '#059669', 'dpiit_category'),
('biotech', 'BioTech', '๐Ÿงฌ', '#A855F7', 'dpiit_category'),
('spacetech', 'SpaceTech', '๐Ÿš€', '#1D4ED8', 'dpiit_category'),
('d2c', 'D2C / E-Commerce', '๐Ÿ›๏ธ', '#F59E0B', 'dpiit_category'),
('saas', 'SaaS', '๐Ÿ’ป', '#6366F1', 'dpiit_category'),
('healthtech', 'HealthTech', '๐Ÿ’Š', '#10B981', 'dpiit_category'),
('iot', 'IoT', '๐Ÿ“ก', '#0D9488', 'dpiit_category'),
('drone_tech', 'Drone Tech', '๐Ÿ›ธ', '#4F46E5', 'dpiit_category'),
('ev', 'EV / E-Mobility', '๐Ÿ”‹', '#16A34A', 'dpiit_category'),
('insurtech', 'InsurTech', '๐Ÿ›ก๏ธ', '#2563EB', 'dpiit_category'),
('wealthtech', 'WealthTech', '๐Ÿ“ˆ', '#7C3AED', 'dpiit_category');
""")
conn.commit()
conn.close()
def haversine_distance(lat1, lon1, lat2, lon2):
R = 6371
dlat = math.radians(lat2 - lat1)
dlon = math.radians(lon2 - lon1)
a = (math.sin(dlat / 2) ** 2 +
math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) *
math.sin(dlon / 2) ** 2)
c = 2 * math.asin(math.sqrt(a))
return R * c