malikparth05's picture
V5 Deploy: Three-Phase Hybrid Scraper + 24/7 Live Operation
49b3fff
import sqlite3
import json
from flask import Flask, render_template, jsonify
from datetime import datetime, timezone, timedelta
import threading
from live_scraper import run_sync_loop
app = Flask(__name__)
DB_PATH = "sentiment_data.db"
STOCKS_FILE = "data/indian_stocks.json"
# Load stock names for display
def load_stock_names():
try:
with open(STOCKS_FILE, "r") as f:
data = json.load(f)
return {s["symbol"]: s["name"] for s in data["stocks"]}
except Exception:
return {}
STOCK_NAMES = load_stock_names()
def get_db_connection():
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
return conn
@app.route("/api/debug")
def debug_sync():
import os, json, traceback
from services.database import create_tables
logs = []
try:
logs.append("Starting debug sync...")
create_tables()
logs.append("Tables initialized.")
if os.path.exists("sentiment_data.json"):
logs.append("Found sentiment_data.json")
conn = get_db_connection()
c = conn.cursor()
c.execute("DELETE FROM sentiment_averages")
c.execute("DELETE FROM sentiment_scores")
with open("sentiment_data.json", "r") as f:
data = json.load(f)
logs.append(f"Loaded {len(data)} averages from JSON.")
for r in data:
c.execute("INSERT INTO sentiment_averages (ticker, average_score, num_headlines, confidence, price_change, score_type, scraped_at) VALUES (?, ?, ?, ?, ?, ?, ?)",
(r['ticker'], r['average_score'], r['num_headlines'], r.get('confidence', 'LOW'), r.get('price_change'), r.get('score_type', 'DIRECT'), r['scraped_at']))
if os.path.exists("sentiment_headlines.json"):
logs.append("Found sentiment_headlines.json")
with open("sentiment_headlines.json", "r") as f:
data2 = json.load(f)
logs.append(f"Loaded {len(data2)} headlines from JSON.")
for r in data2:
c.execute("INSERT INTO sentiment_scores (ticker, headline, score, source, validated, scraped_at) VALUES (?, ?, ?, ?, ?, ?)",
(r['ticker'], r['headline'], r['score'], r['source'], r.get('validated', 1), r['scraped_at']))
conn.commit()
c.execute("SELECT COUNT(*) FROM sentiment_scores")
total = c.fetchone()[0]
logs.append(f"Sync complete. DB sentiment_scores count is now: {total}")
conn.close()
else:
logs.append("sentiment_data.json DOES NOT EXIST in this container.")
except Exception as e:
logs.append(f"FATAL Exception: {str(e)}")
logs.append(traceback.format_exc())
return jsonify({"debug_logs": logs})
@app.route("/")
def landing():
return render_template("landing.html")
@app.route("/dashboard")
def index():
return render_template("index.html")
@app.route("/api/stats")
def api_stats():
"""Get high-level statistics for the top cards."""
conn = get_db_connection()
c = conn.cursor()
c.execute("SELECT COUNT(*) FROM sentiment_scores WHERE validated = 1")
total_headlines = c.fetchone()[0]
# Get number of unique stocks scored (excluding sector entries)
c.execute("SELECT COUNT(DISTINCT ticker) FROM sentiment_averages WHERE ticker NOT LIKE 'SECTOR_%'")
stocks_scored = c.fetchone()[0]
conn.close()
return jsonify({
"total_headlines": total_headlines,
"stocks_scored": stocks_scored,
"ai_accuracy": "86.39%"
})
@app.route("/api/overview")
def api_overview():
"""Get the latest sentiment score for all companies (for the bar chart)."""
conn = get_db_connection()
c = conn.cursor()
# Get the latest average score for each ticker
query = """
SELECT ticker, average_score as score, confidence, price_change, score_type
FROM sentiment_averages
WHERE id IN (
SELECT MAX(id)
FROM sentiment_averages
GROUP BY ticker
)
ORDER BY average_score DESC
"""
c.execute(query)
results = [dict(row) for row in c.fetchall()]
conn.close()
# Filter out SECTOR_ entries from display
results = [r for r in results if not r['ticker'].startswith('SECTOR_')]
# Prioritize DIRECT and HYBRID scores over SECTOR-only
direct_hybrid = [r for r in results if r.get('score_type') in ('DIRECT', 'HYBRID')]
sector_only = [r for r in results if r.get('score_type') == 'SECTOR']
# Bullish: DIRECT/HYBRID first, then fill with SECTOR if needed
dh_bullish = [r for r in direct_hybrid if r['score'] > 0.1]
s_bullish = [r for r in sector_only if r['score'] > 0.1]
bullish = (dh_bullish + s_bullish)[:15]
# Bearish: same priority
dh_bearish = sorted([r for r in direct_hybrid if r['score'] < -0.1], key=lambda x: x['score'])
s_bearish = sorted([r for r in sector_only if r['score'] < -0.1], key=lambda x: x['score'])
bearish = (dh_bearish + s_bearish)[:15]
# Add company names
for items in [bullish, bearish]:
for item in items:
item["name"] = STOCK_NAMES.get(item["ticker"], item["ticker"])
return jsonify({
"bullish": bullish,
"bearish": bearish
})
@app.route("/api/headlines")
def api_headlines():
"""Get the 50 most recently scored headlines (for the live feed)."""
conn = get_db_connection()
c = conn.cursor()
c.execute('''
SELECT ticker, headline, score, source, scraped_at
FROM sentiment_scores
ORDER BY id DESC LIMIT 50
''')
results = [dict(row) for row in c.fetchall()]
conn.close()
# Format time and add full name
for r in results:
try:
dt = datetime.fromisoformat(r['scraped_at'].replace('Z', '+00:00'))
r['time_ago'] = dt.strftime("%H:%M")
except:
r['time_ago'] = ""
r['name'] = STOCK_NAMES.get(r['ticker'].replace('SECTOR_', ''), r['ticker'])
return jsonify(results)
@app.route("/api/search")
def api_search():
"""Search for a specific company's sentiment data."""
from flask import request
query = request.args.get('q', '').upper()
if not query:
return jsonify({"error": "No query provided"}), 400
conn = get_db_connection()
c = conn.cursor()
# Find matching tickers from STOCK_NAMES
matches = []
for ticker, name in STOCK_NAMES.items():
if query in ticker or query in name.upper():
matches.append(ticker)
if not matches:
conn.close()
return jsonify({"results": []})
# Get latest score for top 10 matches
results = []
for match_ticker in matches[:10]:
c.execute('''
SELECT average_score as score, confidence, price_change, score_type, scraped_at
FROM sentiment_averages
WHERE ticker = ?
ORDER BY id DESC LIMIT 1
''', (match_ticker,))
score_row = c.fetchone()
if score_row:
result = dict(score_row)
result['ticker'] = match_ticker
result['name'] = STOCK_NAMES[match_ticker]
results.append(result)
conn.close()
return jsonify({"results": results})
@app.route("/api/company/<ticker>")
def api_company(ticker):
"""Get detailed data for a single company (trend + headlines)."""
conn = get_db_connection()
c = conn.cursor()
# 1. Get recent trend (last 10 averages) — direct ticker only
c.execute('''
SELECT average_score as score, confidence, price_change, score_type, scraped_at
FROM sentiment_averages
WHERE ticker = ?
ORDER BY id DESC LIMIT 10
''', (ticker,))
trend_rows = [dict(row) for row in c.fetchall()]
trend = []
for r in trend_rows[::-1]:
try:
dt = datetime.fromisoformat(r['scraped_at'].replace('Z', '+00:00'))
r['time_label'] = dt.strftime("%H:%M")
except:
r['time_label'] = ""
trend.append(r)
# 2. Get headlines for this company (direct only, entity-validated)
try:
with open(STOCKS_FILE, "r") as f:
full_data = json.load(f)
stock_obj = next((s for s in full_data["stocks"] if s["symbol"] == ticker), None)
sector = stock_obj.get("sector", "General") if stock_obj else "General"
except:
sector = "General"
c.execute('''
SELECT headline, score, source, scraped_at
FROM sentiment_scores
WHERE ticker = ?
ORDER BY id DESC LIMIT 15
''', (ticker,))
headlines_rows = [dict(row) for row in c.fetchall()]
headlines = []
for r in headlines_rows:
try:
dt = datetime.fromisoformat(r['scraped_at'].replace('Z', '+00:00'))
r['time_ago'] = dt.strftime("%b %d, %H:%M")
except:
r['time_ago'] = ""
headlines.append(r)
conn.close()
current_score = trend[-1]['score'] if trend else 0.0
latest_confidence = trend[-1].get('confidence', 'LOW') if trend else 'LOW'
latest_price = trend[-1].get('price_change') if trend else None
return jsonify({
"ticker": ticker,
"name": STOCK_NAMES.get(ticker, ticker),
"sector": sector,
"current_score": current_score,
"confidence": latest_confidence,
"price_change": latest_price,
"trend": trend,
"headlines": headlines
})
if __name__ == "__main__":
import os
from services.database import create_tables
# Initialize SQLite schema if new deployment
create_tables()
# Start the background scraper immediately on launch
def start_sync_worker():
"""Starts the live_scraper logic in a background thread."""
print("🛰️ Initializing Sentix Background Sync Engine...")
worker = threading.Thread(target=run_sync_loop, daemon=True)
worker.start()
start_sync_worker()
port = int(os.environ.get("PORT", 7860))
app.run(host="0.0.0.0", debug=False, port=port)