Upload folder using huggingface_hub
Browse files- README.md +8 -0
- frontend/app/components/intelligence/IntelligenceFeed.tsx +45 -15
- src/config/intel_config.json +64 -1
- src/nodes/combinedAgentNode.py +48 -0
- src/utils/profile_scrapers.py +70 -25
- src/utils/trending_detector.py +325 -0
- test_all_tools.py +276 -0
- test_ml_pipelines.py +231 -0
- test_news_tools.py +117 -0
- test_social_tools.py +185 -0
- test_weather_tools.py +57 -0
- tool_test_results.json +73 -0
README.md
CHANGED
|
@@ -64,6 +64,12 @@ A multi-agent AI system that aggregates intelligence from 47+ data sources to pr
|
|
| 64 |
- Domain filtering (political, economic, weather, social)
|
| 65 |
- Floating chat UI in dashboard
|
| 66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
✅ **Real-Time Dashboard** with:
|
| 68 |
- Live Intelligence Feed
|
| 69 |
- Floating AI Chatbox
|
|
@@ -71,6 +77,8 @@ A multi-agent AI system that aggregates intelligence from 47+ data sources to pr
|
|
| 71 |
- **Live Satellite/Weather Map** (Windy.com) 🆕
|
| 72 |
- **National Flood Threat Score** 🆕
|
| 73 |
- **30-Year Historical Climate Analysis** 🆕
|
|
|
|
|
|
|
| 74 |
- Operational Risk Radar
|
| 75 |
- ML Anomaly Detection Display
|
| 76 |
- Market Predictions with Moving Averages
|
|
|
|
| 64 |
- Domain filtering (political, economic, weather, social)
|
| 65 |
- Floating chat UI in dashboard
|
| 66 |
|
| 67 |
+
✅ **Trending/Velocity Detection** 🆕:
|
| 68 |
+
- SQLite-based topic frequency tracking (24-hour rolling window)
|
| 69 |
+
- Momentum calculation: `current_hour / avg_last_6_hours`
|
| 70 |
+
- Spike alerts when topic volume > 3x baseline
|
| 71 |
+
- Integrated into Combined Agent dashboard
|
| 72 |
+
|
| 73 |
✅ **Real-Time Dashboard** with:
|
| 74 |
- Live Intelligence Feed
|
| 75 |
- Floating AI Chatbox
|
|
|
|
| 77 |
- **Live Satellite/Weather Map** (Windy.com) 🆕
|
| 78 |
- **National Flood Threat Score** 🆕
|
| 79 |
- **30-Year Historical Climate Analysis** 🆕
|
| 80 |
+
- **Trending Topics & Spike Alerts** 🆕
|
| 81 |
+
- **Enhanced Operational Indicators** 🆕 (infrastructure_health, regulatory_activity, investment_climate)
|
| 82 |
- Operational Risk Radar
|
| 83 |
- ML Anomaly Detection Display
|
| 84 |
- Market Predictions with Moving Averages
|
frontend/app/components/intelligence/IntelligenceFeed.tsx
CHANGED
|
@@ -1,10 +1,12 @@
|
|
| 1 |
import { Card } from "../ui/card";
|
| 2 |
import { Badge } from "../ui/badge";
|
| 3 |
import { Tabs, TabsContent, TabsList, TabsTrigger } from "../ui/tabs";
|
| 4 |
-
import {
|
|
|
|
| 5 |
import { useRogerData, RogerEvent } from "../../hooks/use-roger-data";
|
| 6 |
-
import { motion } from "framer-motion";
|
| 7 |
import { useState } from "react";
|
|
|
|
| 8 |
|
| 9 |
const IntelligenceFeed = () => {
|
| 10 |
const { events, isConnected } = useRogerData();
|
|
@@ -12,6 +14,9 @@ const IntelligenceFeed = () => {
|
|
| 12 |
// Region toggle state (Sri Lanka / World)
|
| 13 |
const [region, setRegion] = useState<"sri_lanka" | "world">("sri_lanka");
|
| 14 |
|
|
|
|
|
|
|
|
|
|
| 15 |
// ALWAYS ensure events is an array
|
| 16 |
const safeEvents: RogerEvent[] = Array.isArray(events) ? events : [];
|
| 17 |
|
|
@@ -118,21 +123,46 @@ const IntelligenceFeed = () => {
|
|
| 118 |
<Radio className="w-5 h-5 text-primary" />
|
| 119 |
<h2 className="text-lg font-bold">INTELLIGENCE FEED</h2>
|
| 120 |
|
| 121 |
-
<
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
<
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
</div>
|
| 135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
{/* REGION TOGGLE - Sri Lanka / World */}
|
| 137 |
<div className="flex gap-2 mb-4 overflow-x-auto hide-scrollbar">
|
| 138 |
<button
|
|
|
|
| 1 |
import { Card } from "../ui/card";
|
| 2 |
import { Badge } from "../ui/badge";
|
| 3 |
import { Tabs, TabsContent, TabsList, TabsTrigger } from "../ui/tabs";
|
| 4 |
+
import { Button } from "../ui/button";
|
| 5 |
+
import { Newspaper, Cloud, TrendingUp, FileText, Radio, Globe, MapPin, Settings } from "lucide-react";
|
| 6 |
import { useRogerData, RogerEvent } from "../../hooks/use-roger-data";
|
| 7 |
+
import { motion, AnimatePresence } from "framer-motion";
|
| 8 |
import { useState } from "react";
|
| 9 |
+
import IntelligenceSettings from "./IntelligenceSettings";
|
| 10 |
|
| 11 |
const IntelligenceFeed = () => {
|
| 12 |
const { events, isConnected } = useRogerData();
|
|
|
|
| 14 |
// Region toggle state (Sri Lanka / World)
|
| 15 |
const [region, setRegion] = useState<"sri_lanka" | "world">("sri_lanka");
|
| 16 |
|
| 17 |
+
// Settings panel toggle
|
| 18 |
+
const [showSettings, setShowSettings] = useState(false);
|
| 19 |
+
|
| 20 |
// ALWAYS ensure events is an array
|
| 21 |
const safeEvents: RogerEvent[] = Array.isArray(events) ? events : [];
|
| 22 |
|
|
|
|
| 123 |
<Radio className="w-5 h-5 text-primary" />
|
| 124 |
<h2 className="text-lg font-bold">INTELLIGENCE FEED</h2>
|
| 125 |
|
| 126 |
+
<div className="ml-auto flex items-center gap-3">
|
| 127 |
+
<Button
|
| 128 |
+
variant={showSettings ? "default" : "outline"}
|
| 129 |
+
size="sm"
|
| 130 |
+
onClick={() => setShowSettings(!showSettings)}
|
| 131 |
+
className="flex items-center gap-1"
|
| 132 |
+
>
|
| 133 |
+
<Settings className="w-4 h-4" />
|
| 134 |
+
<span className="hidden sm:inline">{showSettings ? "Hide" : "Settings"}</span>
|
| 135 |
+
</Button>
|
| 136 |
+
<span className="text-xs font-mono text-muted-foreground">
|
| 137 |
+
{isConnected ? (
|
| 138 |
+
<span className="flex items-center gap-2">
|
| 139 |
+
<span className="w-2 h-2 rounded-full bg-success animate-pulse"></span>
|
| 140 |
+
Live
|
| 141 |
+
</span>
|
| 142 |
+
) : (
|
| 143 |
+
<span className="flex items-center gap-2">
|
| 144 |
+
<span className="w-2 h-2 rounded-full bg-warning"></span>
|
| 145 |
+
Reconnecting...
|
| 146 |
+
</span>
|
| 147 |
+
)}
|
| 148 |
+
</span>
|
| 149 |
+
</div>
|
| 150 |
</div>
|
| 151 |
|
| 152 |
+
{/* SETTINGS PANEL */}
|
| 153 |
+
<AnimatePresence>
|
| 154 |
+
{showSettings && (
|
| 155 |
+
<motion.div
|
| 156 |
+
initial={{ opacity: 0, height: 0 }}
|
| 157 |
+
animate={{ opacity: 1, height: "auto" }}
|
| 158 |
+
exit={{ opacity: 0, height: 0 }}
|
| 159 |
+
className="mb-4 overflow-hidden"
|
| 160 |
+
>
|
| 161 |
+
<IntelligenceSettings />
|
| 162 |
+
</motion.div>
|
| 163 |
+
)}
|
| 164 |
+
</AnimatePresence>
|
| 165 |
+
|
| 166 |
{/* REGION TOGGLE - Sri Lanka / World */}
|
| 167 |
<div className="flex gap-2 mb-4 overflow-x-auto hide-scrollbar">
|
| 168 |
<button
|
src/config/intel_config.json
CHANGED
|
@@ -5,5 +5,68 @@
|
|
| 5 |
"linkedin": []
|
| 6 |
},
|
| 7 |
"user_keywords": [],
|
| 8 |
-
"user_products": []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
}
|
|
|
|
| 5 |
"linkedin": []
|
| 6 |
},
|
| 7 |
"user_keywords": [],
|
| 8 |
+
"user_products": [],
|
| 9 |
+
"operational_keywords": {
|
| 10 |
+
"infrastructure": [
|
| 11 |
+
"Colombo port",
|
| 12 |
+
"Hambantota port",
|
| 13 |
+
"port strike",
|
| 14 |
+
"power outage",
|
| 15 |
+
"water shortage",
|
| 16 |
+
"fuel shortage",
|
| 17 |
+
"airport delay",
|
| 18 |
+
"customs clearance",
|
| 19 |
+
"road closure",
|
| 20 |
+
"railway disruption"
|
| 21 |
+
],
|
| 22 |
+
"government": [
|
| 23 |
+
"cabinet decision",
|
| 24 |
+
"new policy",
|
| 25 |
+
"regulation change",
|
| 26 |
+
"tax amendment",
|
| 27 |
+
"import restriction",
|
| 28 |
+
"export ban",
|
| 29 |
+
"license requirement",
|
| 30 |
+
"central bank",
|
| 31 |
+
"budget announcement"
|
| 32 |
+
],
|
| 33 |
+
"opportunity": [
|
| 34 |
+
"investment",
|
| 35 |
+
"expansion",
|
| 36 |
+
"new factory",
|
| 37 |
+
"job creation",
|
| 38 |
+
"export growth",
|
| 39 |
+
"tourism boost",
|
| 40 |
+
"infrastructure project",
|
| 41 |
+
"development grant",
|
| 42 |
+
"FDI",
|
| 43 |
+
"trade agreement"
|
| 44 |
+
]
|
| 45 |
+
},
|
| 46 |
+
"alert_thresholds": {
|
| 47 |
+
"trending_momentum_min": 2.0,
|
| 48 |
+
"spike_multiplier": 3.0,
|
| 49 |
+
"high_risk_score": 0.7,
|
| 50 |
+
"high_opportunity_score": 0.6
|
| 51 |
+
},
|
| 52 |
+
"default_competitors": {
|
| 53 |
+
"telecom": {
|
| 54 |
+
"twitter": [
|
| 55 |
+
"DialogSriLanka",
|
| 56 |
+
"Mobaborang",
|
| 57 |
+
"HutchSL"
|
| 58 |
+
],
|
| 59 |
+
"facebook": [
|
| 60 |
+
"Dialog",
|
| 61 |
+
"SLT-Mobitel",
|
| 62 |
+
"Hutch"
|
| 63 |
+
]
|
| 64 |
+
}
|
| 65 |
+
},
|
| 66 |
+
"notes": {
|
| 67 |
+
"removed_profiles": [
|
| 68 |
+
"SLTMobitel - Twitter profile not found/restricted"
|
| 69 |
+
],
|
| 70 |
+
"last_verified": "2025-12-08"
|
| 71 |
+
}
|
| 72 |
}
|
src/nodes/combinedAgentNode.py
CHANGED
|
@@ -14,6 +14,13 @@ from typing import Dict, Any, List
|
|
| 14 |
# Import storage manager for production-grade persistence
|
| 15 |
from src.storage.storage_manager import StorageManager
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
logger = logging.getLogger("combined_node")
|
| 18 |
logger.setLevel(logging.INFO)
|
| 19 |
if not logger.handlers:
|
|
@@ -473,6 +480,11 @@ JSON only:"""
|
|
| 473 |
"avg_confidence": 0.0,
|
| 474 |
"high_priority_count": 0,
|
| 475 |
"total_events": 0,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 476 |
"last_updated": datetime.utcnow().isoformat()
|
| 477 |
}
|
| 478 |
|
|
@@ -527,6 +539,42 @@ JSON only:"""
|
|
| 527 |
snapshot["avg_confidence"] = round(avg_confidence, 3)
|
| 528 |
snapshot["high_priority_count"] = high_priority_count
|
| 529 |
snapshot["total_events"] = len(feed)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 530 |
snapshot["last_updated"] = datetime.utcnow().isoformat()
|
| 531 |
|
| 532 |
logger.info(f"[DataRefresherAgent] Dashboard Metrics:")
|
|
|
|
| 14 |
# Import storage manager for production-grade persistence
|
| 15 |
from src.storage.storage_manager import StorageManager
|
| 16 |
|
| 17 |
+
# Import trending detector for velocity metrics
|
| 18 |
+
try:
|
| 19 |
+
from src.utils.trending_detector import get_trending_detector, record_topic_mention
|
| 20 |
+
TRENDING_ENABLED = True
|
| 21 |
+
except ImportError:
|
| 22 |
+
TRENDING_ENABLED = False
|
| 23 |
+
|
| 24 |
logger = logging.getLogger("combined_node")
|
| 25 |
logger.setLevel(logging.INFO)
|
| 26 |
if not logger.handlers:
|
|
|
|
| 480 |
"avg_confidence": 0.0,
|
| 481 |
"high_priority_count": 0,
|
| 482 |
"total_events": 0,
|
| 483 |
+
"trending_topics": [],
|
| 484 |
+
"spike_alerts": [],
|
| 485 |
+
"infrastructure_health": 1.0,
|
| 486 |
+
"regulatory_activity": 0.0,
|
| 487 |
+
"investment_climate": 0.5,
|
| 488 |
"last_updated": datetime.utcnow().isoformat()
|
| 489 |
}
|
| 490 |
|
|
|
|
| 539 |
snapshot["avg_confidence"] = round(avg_confidence, 3)
|
| 540 |
snapshot["high_priority_count"] = high_priority_count
|
| 541 |
snapshot["total_events"] = len(feed)
|
| 542 |
+
|
| 543 |
+
# NEW: Enhanced Operational Indicators
|
| 544 |
+
# Infrastructure Health (inverted logistics friction)
|
| 545 |
+
snapshot["infrastructure_health"] = round(max(0, 1.0 - snapshot["logistics_friction"]), 3)
|
| 546 |
+
|
| 547 |
+
# Regulatory Activity (sum of political events)
|
| 548 |
+
snapshot["regulatory_activity"] = round(len(political_scores) * 0.1, 3)
|
| 549 |
+
|
| 550 |
+
# Investment Climate (opportunity-weighted)
|
| 551 |
+
if opportunity_scores:
|
| 552 |
+
snapshot["investment_climate"] = round(0.5 + safe_avg(opportunity_scores) * 0.5, 3)
|
| 553 |
+
|
| 554 |
+
# NEW: Record topics for trending analysis and get current trends
|
| 555 |
+
if TRENDING_ENABLED:
|
| 556 |
+
try:
|
| 557 |
+
detector = get_trending_detector()
|
| 558 |
+
|
| 559 |
+
# Record topics from feed
|
| 560 |
+
for item in feed:
|
| 561 |
+
summary = item.get("summary", "")
|
| 562 |
+
domain = item.get("target_agent", "unknown")
|
| 563 |
+
|
| 564 |
+
# Extract key topic words (simplified - just use first 3 words)
|
| 565 |
+
words = summary.split()[:5]
|
| 566 |
+
if words:
|
| 567 |
+
topic = " ".join(words).lower()
|
| 568 |
+
record_topic_mention(topic, source="roger_feed", domain=domain)
|
| 569 |
+
|
| 570 |
+
# Get trending topics and spike alerts
|
| 571 |
+
snapshot["trending_topics"] = detector.get_trending_topics(limit=5)
|
| 572 |
+
snapshot["spike_alerts"] = detector.get_spike_alerts(limit=3)
|
| 573 |
+
|
| 574 |
+
logger.info(f"[DataRefresherAgent] Trending: {len(snapshot['trending_topics'])} topics, {len(snapshot['spike_alerts'])} spikes")
|
| 575 |
+
except Exception as e:
|
| 576 |
+
logger.warning(f"[DataRefresherAgent] Trending detection failed: {e}")
|
| 577 |
+
|
| 578 |
snapshot["last_updated"] = datetime.utcnow().isoformat()
|
| 579 |
|
| 580 |
logger.info(f"[DataRefresherAgent] Dashboard Metrics:")
|
src/utils/profile_scrapers.py
CHANGED
|
@@ -45,6 +45,11 @@ def scrape_twitter_profile(username: str, max_items: int = 20):
|
|
| 45 |
Fetches tweets from a specific user's profile, not search results.
|
| 46 |
Perfect for monitoring competitor accounts, influencers, or specific business profiles.
|
| 47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
Args:
|
| 49 |
username: Twitter username (without @)
|
| 50 |
max_items: Maximum number of tweets to fetch
|
|
@@ -106,39 +111,79 @@ def scrape_twitter_profile(username: str, max_items: int = 20):
|
|
| 106 |
|
| 107 |
page = context.new_page()
|
| 108 |
|
| 109 |
-
# Navigate to user profile
|
| 110 |
profile_url = f"https://x.com/{username}"
|
| 111 |
logger.info(f"[TWITTER_PROFILE] Monitoring @{username}")
|
| 112 |
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
try:
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
|
|
|
| 128 |
except:
|
| 129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
-
#
|
| 132 |
try:
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
|
|
|
| 142 |
|
| 143 |
# Check if logged in
|
| 144 |
if "login" in page.url:
|
|
|
|
| 45 |
Fetches tweets from a specific user's profile, not search results.
|
| 46 |
Perfect for monitoring competitor accounts, influencers, or specific business profiles.
|
| 47 |
|
| 48 |
+
Features:
|
| 49 |
+
- Retry logic with exponential backoff (3 attempts)
|
| 50 |
+
- Fallback to keyword search if profile fails
|
| 51 |
+
- Increased timeout (90s)
|
| 52 |
+
|
| 53 |
Args:
|
| 54 |
username: Twitter username (without @)
|
| 55 |
max_items: Maximum number of tweets to fetch
|
|
|
|
| 111 |
|
| 112 |
page = context.new_page()
|
| 113 |
|
| 114 |
+
# Navigate to user profile with retry logic
|
| 115 |
profile_url = f"https://x.com/{username}"
|
| 116 |
logger.info(f"[TWITTER_PROFILE] Monitoring @{username}")
|
| 117 |
|
| 118 |
+
max_retries = 3
|
| 119 |
+
navigation_success = False
|
| 120 |
+
last_error = None
|
| 121 |
+
|
| 122 |
+
for attempt in range(max_retries):
|
| 123 |
+
try:
|
| 124 |
+
# Exponential backoff: 0, 2, 4 seconds
|
| 125 |
+
if attempt > 0:
|
| 126 |
+
wait_time = 2 ** attempt
|
| 127 |
+
logger.info(f"[TWITTER_PROFILE] Retry {attempt + 1}/{max_retries} after {wait_time}s...")
|
| 128 |
+
time.sleep(wait_time)
|
| 129 |
+
|
| 130 |
+
# Increased timeout from 60s to 90s, changed to networkidle
|
| 131 |
+
page.goto(profile_url, timeout=90000, wait_until="networkidle")
|
| 132 |
+
time.sleep(5)
|
| 133 |
+
|
| 134 |
+
# Handle popups
|
| 135 |
+
popup_selectors = [
|
| 136 |
+
"[data-testid='app-bar-close']",
|
| 137 |
+
"[aria-label='Close']",
|
| 138 |
+
"button:has-text('Not now')",
|
| 139 |
+
]
|
| 140 |
+
for selector in popup_selectors:
|
| 141 |
+
try:
|
| 142 |
+
if page.locator(selector).count() > 0 and page.locator(selector).first.is_visible():
|
| 143 |
+
page.locator(selector).first.click()
|
| 144 |
+
time.sleep(1)
|
| 145 |
+
except:
|
| 146 |
+
pass
|
| 147 |
+
|
| 148 |
+
# Wait for tweets to load
|
| 149 |
try:
|
| 150 |
+
page.wait_for_selector("article[data-testid='tweet']", timeout=20000)
|
| 151 |
+
logger.info(f"[TWITTER_PROFILE] Loaded {username}'s profile")
|
| 152 |
+
navigation_success = True
|
| 153 |
+
break
|
| 154 |
except:
|
| 155 |
+
last_error = f"Could not load tweets for @{username}"
|
| 156 |
+
logger.warning(f"[TWITTER_PROFILE] {last_error}, attempt {attempt + 1}/{max_retries}")
|
| 157 |
+
continue
|
| 158 |
+
|
| 159 |
+
except Exception as e:
|
| 160 |
+
last_error = str(e)
|
| 161 |
+
logger.warning(f"[TWITTER_PROFILE] Navigation failed on attempt {attempt + 1}: {e}")
|
| 162 |
+
continue
|
| 163 |
+
|
| 164 |
+
# If profile scraping failed after all retries, try fallback to keyword search
|
| 165 |
+
if not navigation_success:
|
| 166 |
+
logger.warning(f"[TWITTER_PROFILE] Profile scraping failed, falling back to keyword search for '{username}'")
|
| 167 |
+
browser.close()
|
| 168 |
|
| 169 |
+
# Fallback: use keyword search instead
|
| 170 |
try:
|
| 171 |
+
from src.utils.utils import scrape_twitter
|
| 172 |
+
fallback_result = scrape_twitter.invoke({"query": username, "max_items": max_items})
|
| 173 |
+
fallback_data = json.loads(fallback_result) if isinstance(fallback_result, str) else fallback_result
|
| 174 |
+
|
| 175 |
+
if "error" not in fallback_data:
|
| 176 |
+
fallback_data["fallback_used"] = True
|
| 177 |
+
fallback_data["original_error"] = last_error
|
| 178 |
+
fallback_data["note"] = f"Used keyword search as fallback for @{username}"
|
| 179 |
+
return json.dumps(fallback_data, default=str)
|
| 180 |
+
except Exception as fallback_error:
|
| 181 |
+
logger.error(f"[TWITTER_PROFILE] Fallback also failed: {fallback_error}")
|
| 182 |
|
| 183 |
+
return json.dumps({
|
| 184 |
+
"error": last_error or f"Profile not found or private: @{username}",
|
| 185 |
+
"fallback_attempted": True
|
| 186 |
+
}, default=str)
|
| 187 |
|
| 188 |
# Check if logged in
|
| 189 |
if "login" in page.url:
|
src/utils/trending_detector.py
ADDED
|
@@ -0,0 +1,325 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
src/utils/trending_detector.py
|
| 3 |
+
Trending/Velocity Detection Module for Roger
|
| 4 |
+
|
| 5 |
+
Tracks topic mention frequency over time to detect:
|
| 6 |
+
- Topics gaining traction (momentum)
|
| 7 |
+
- Sudden volume spikes (alerts)
|
| 8 |
+
- Trending topics across the system
|
| 9 |
+
|
| 10 |
+
Uses SQLite for persistence.
|
| 11 |
+
"""
|
| 12 |
+
import os
|
| 13 |
+
import json
|
| 14 |
+
import sqlite3
|
| 15 |
+
import hashlib
|
| 16 |
+
import logging
|
| 17 |
+
from datetime import datetime, timedelta
|
| 18 |
+
from typing import List, Dict, Any, Optional, Tuple
|
| 19 |
+
from pathlib import Path
|
| 20 |
+
|
| 21 |
+
logger = logging.getLogger("Roger.trending")
|
| 22 |
+
|
| 23 |
+
# Default database path
|
| 24 |
+
DEFAULT_DB_PATH = os.path.join(
|
| 25 |
+
os.path.dirname(__file__), "..", "..", "data", "trending.db"
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
class TrendingDetector:
|
| 30 |
+
"""
|
| 31 |
+
Detects trending topics and velocity spikes.
|
| 32 |
+
|
| 33 |
+
Features:
|
| 34 |
+
- Records topic mentions with timestamps
|
| 35 |
+
- Calculates momentum (current_hour / avg_last_6_hours)
|
| 36 |
+
- Detects spikes (>3x normal volume in 1 hour)
|
| 37 |
+
- Returns trending topics for dashboard display
|
| 38 |
+
"""
|
| 39 |
+
|
| 40 |
+
def __init__(self, db_path: str = None, spike_threshold: float = 3.0, momentum_threshold: float = 2.0):
|
| 41 |
+
"""
|
| 42 |
+
Initialize the TrendingDetector.
|
| 43 |
+
|
| 44 |
+
Args:
|
| 45 |
+
db_path: Path to SQLite database (default: data/trending.db)
|
| 46 |
+
spike_threshold: Multiplier for spike detection (default: 3x)
|
| 47 |
+
momentum_threshold: Minimum momentum to be considered trending (default: 2.0)
|
| 48 |
+
"""
|
| 49 |
+
self.db_path = db_path or DEFAULT_DB_PATH
|
| 50 |
+
self.spike_threshold = spike_threshold
|
| 51 |
+
self.momentum_threshold = momentum_threshold
|
| 52 |
+
|
| 53 |
+
# Ensure directory exists
|
| 54 |
+
os.makedirs(os.path.dirname(self.db_path), exist_ok=True)
|
| 55 |
+
|
| 56 |
+
# Initialize database
|
| 57 |
+
self._init_db()
|
| 58 |
+
logger.info(f"[TrendingDetector] Initialized with db: {self.db_path}")
|
| 59 |
+
|
| 60 |
+
def _init_db(self):
|
| 61 |
+
"""Create tables if they don't exist"""
|
| 62 |
+
with sqlite3.connect(self.db_path) as conn:
|
| 63 |
+
conn.execute("""
|
| 64 |
+
CREATE TABLE IF NOT EXISTS topic_mentions (
|
| 65 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 66 |
+
topic TEXT NOT NULL,
|
| 67 |
+
topic_hash TEXT NOT NULL,
|
| 68 |
+
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
|
| 69 |
+
source TEXT,
|
| 70 |
+
domain TEXT
|
| 71 |
+
)
|
| 72 |
+
""")
|
| 73 |
+
conn.execute("""
|
| 74 |
+
CREATE INDEX IF NOT EXISTS idx_topic_hash ON topic_mentions(topic_hash)
|
| 75 |
+
""")
|
| 76 |
+
conn.execute("""
|
| 77 |
+
CREATE INDEX IF NOT EXISTS idx_timestamp ON topic_mentions(timestamp)
|
| 78 |
+
""")
|
| 79 |
+
|
| 80 |
+
# Hourly aggregates for faster queries
|
| 81 |
+
conn.execute("""
|
| 82 |
+
CREATE TABLE IF NOT EXISTS hourly_counts (
|
| 83 |
+
topic_hash TEXT NOT NULL,
|
| 84 |
+
hour_bucket TEXT NOT NULL,
|
| 85 |
+
count INTEGER DEFAULT 1,
|
| 86 |
+
topic TEXT,
|
| 87 |
+
PRIMARY KEY (topic_hash, hour_bucket)
|
| 88 |
+
)
|
| 89 |
+
""")
|
| 90 |
+
conn.commit()
|
| 91 |
+
|
| 92 |
+
def _topic_hash(self, topic: str) -> str:
|
| 93 |
+
"""Generate a hash for a topic (normalized lowercase)"""
|
| 94 |
+
normalized = topic.lower().strip()
|
| 95 |
+
return hashlib.md5(normalized.encode()).hexdigest()[:12]
|
| 96 |
+
|
| 97 |
+
def _get_hour_bucket(self, dt: datetime = None) -> str:
|
| 98 |
+
"""Get the hour bucket string (YYYY-MM-DD-HH)"""
|
| 99 |
+
dt = dt or datetime.utcnow()
|
| 100 |
+
return dt.strftime("%Y-%m-%d-%H")
|
| 101 |
+
|
| 102 |
+
def record_mention(
|
| 103 |
+
self,
|
| 104 |
+
topic: str,
|
| 105 |
+
source: str = None,
|
| 106 |
+
domain: str = None,
|
| 107 |
+
timestamp: datetime = None
|
| 108 |
+
):
|
| 109 |
+
"""
|
| 110 |
+
Record a topic mention.
|
| 111 |
+
|
| 112 |
+
Args:
|
| 113 |
+
topic: The topic/keyword mentioned
|
| 114 |
+
source: Source of the mention (e.g., 'twitter', 'news')
|
| 115 |
+
domain: Domain (e.g., 'political', 'economical')
|
| 116 |
+
timestamp: When the mention occurred (default: now)
|
| 117 |
+
"""
|
| 118 |
+
topic_hash = self._topic_hash(topic)
|
| 119 |
+
ts = timestamp or datetime.utcnow()
|
| 120 |
+
hour_bucket = self._get_hour_bucket(ts)
|
| 121 |
+
|
| 122 |
+
with sqlite3.connect(self.db_path) as conn:
|
| 123 |
+
# Insert mention
|
| 124 |
+
conn.execute("""
|
| 125 |
+
INSERT INTO topic_mentions (topic, topic_hash, timestamp, source, domain)
|
| 126 |
+
VALUES (?, ?, ?, ?, ?)
|
| 127 |
+
""", (topic.lower().strip(), topic_hash, ts.isoformat(), source, domain))
|
| 128 |
+
|
| 129 |
+
# Update hourly aggregate
|
| 130 |
+
conn.execute("""
|
| 131 |
+
INSERT INTO hourly_counts (topic_hash, hour_bucket, count, topic)
|
| 132 |
+
VALUES (?, ?, 1, ?)
|
| 133 |
+
ON CONFLICT(topic_hash, hour_bucket) DO UPDATE SET count = count + 1
|
| 134 |
+
""", (topic_hash, hour_bucket, topic.lower().strip()))
|
| 135 |
+
|
| 136 |
+
conn.commit()
|
| 137 |
+
|
| 138 |
+
def record_mentions_batch(self, mentions: List[Dict[str, Any]]):
|
| 139 |
+
"""
|
| 140 |
+
Record multiple mentions at once.
|
| 141 |
+
|
| 142 |
+
Args:
|
| 143 |
+
mentions: List of dicts with keys: topic, source, domain, timestamp
|
| 144 |
+
"""
|
| 145 |
+
for mention in mentions:
|
| 146 |
+
self.record_mention(
|
| 147 |
+
topic=mention.get("topic", ""),
|
| 148 |
+
source=mention.get("source"),
|
| 149 |
+
domain=mention.get("domain"),
|
| 150 |
+
timestamp=mention.get("timestamp")
|
| 151 |
+
)
|
| 152 |
+
|
| 153 |
+
def get_momentum(self, topic: str) -> float:
|
| 154 |
+
"""
|
| 155 |
+
Calculate momentum for a topic.
|
| 156 |
+
|
| 157 |
+
Momentum = mentions_in_current_hour / avg_mentions_in_last_6_hours
|
| 158 |
+
|
| 159 |
+
Returns:
|
| 160 |
+
Momentum value (1.0 = normal, >2.0 = trending, >3.0 = spike)
|
| 161 |
+
"""
|
| 162 |
+
topic_hash = self._topic_hash(topic)
|
| 163 |
+
now = datetime.utcnow()
|
| 164 |
+
current_hour = self._get_hour_bucket(now)
|
| 165 |
+
|
| 166 |
+
with sqlite3.connect(self.db_path) as conn:
|
| 167 |
+
# Get current hour count
|
| 168 |
+
result = conn.execute("""
|
| 169 |
+
SELECT count FROM hourly_counts
|
| 170 |
+
WHERE topic_hash = ? AND hour_bucket = ?
|
| 171 |
+
""", (topic_hash, current_hour)).fetchone()
|
| 172 |
+
current_count = result[0] if result else 0
|
| 173 |
+
|
| 174 |
+
# Get average of last 6 hours
|
| 175 |
+
past_hours = []
|
| 176 |
+
for i in range(1, 7):
|
| 177 |
+
past_dt = now - timedelta(hours=i)
|
| 178 |
+
past_hours.append(self._get_hour_bucket(past_dt))
|
| 179 |
+
|
| 180 |
+
placeholders = ",".join(["?" for _ in past_hours])
|
| 181 |
+
result = conn.execute(f"""
|
| 182 |
+
SELECT AVG(count) FROM hourly_counts
|
| 183 |
+
WHERE topic_hash = ? AND hour_bucket IN ({placeholders})
|
| 184 |
+
""", [topic_hash] + past_hours).fetchone()
|
| 185 |
+
avg_count = result[0] if result and result[0] else 0.1 # Avoid division by zero
|
| 186 |
+
|
| 187 |
+
return current_count / avg_count if avg_count > 0 else current_count
|
| 188 |
+
|
| 189 |
+
def is_spike(self, topic: str, window_hours: int = 1) -> bool:
|
| 190 |
+
"""
|
| 191 |
+
Check if a topic is experiencing a spike.
|
| 192 |
+
|
| 193 |
+
A spike is when current volume > spike_threshold * normal volume.
|
| 194 |
+
"""
|
| 195 |
+
momentum = self.get_momentum(topic)
|
| 196 |
+
return momentum >= self.spike_threshold
|
| 197 |
+
|
| 198 |
+
def get_trending_topics(self, limit: int = 10) -> List[Dict[str, Any]]:
|
| 199 |
+
"""
|
| 200 |
+
Get topics with momentum above threshold.
|
| 201 |
+
|
| 202 |
+
Returns:
|
| 203 |
+
List of trending topics with their momentum values
|
| 204 |
+
"""
|
| 205 |
+
now = datetime.utcnow()
|
| 206 |
+
current_hour = self._get_hour_bucket(now)
|
| 207 |
+
|
| 208 |
+
trending = []
|
| 209 |
+
|
| 210 |
+
with sqlite3.connect(self.db_path) as conn:
|
| 211 |
+
# Get all topics mentioned in current hour
|
| 212 |
+
results = conn.execute("""
|
| 213 |
+
SELECT DISTINCT topic, topic_hash, count
|
| 214 |
+
FROM hourly_counts
|
| 215 |
+
WHERE hour_bucket = ?
|
| 216 |
+
ORDER BY count DESC
|
| 217 |
+
LIMIT 50
|
| 218 |
+
""", (current_hour,)).fetchall()
|
| 219 |
+
|
| 220 |
+
for topic, topic_hash, count in results:
|
| 221 |
+
momentum = self.get_momentum(topic)
|
| 222 |
+
|
| 223 |
+
if momentum >= self.momentum_threshold:
|
| 224 |
+
trending.append({
|
| 225 |
+
"topic": topic,
|
| 226 |
+
"momentum": round(momentum, 2),
|
| 227 |
+
"mentions_this_hour": count,
|
| 228 |
+
"is_spike": momentum >= self.spike_threshold,
|
| 229 |
+
"severity": "high" if momentum >= 5 else "medium" if momentum >= 3 else "low"
|
| 230 |
+
})
|
| 231 |
+
|
| 232 |
+
# Sort by momentum descending
|
| 233 |
+
trending.sort(key=lambda x: x["momentum"], reverse=True)
|
| 234 |
+
return trending[:limit]
|
| 235 |
+
|
| 236 |
+
def get_spike_alerts(self, limit: int = 5) -> List[Dict[str, Any]]:
|
| 237 |
+
"""
|
| 238 |
+
Get topics with spike alerts (>3x normal volume).
|
| 239 |
+
|
| 240 |
+
Returns:
|
| 241 |
+
List of spike alerts
|
| 242 |
+
"""
|
| 243 |
+
return [t for t in self.get_trending_topics(limit=50) if t["is_spike"]][:limit]
|
| 244 |
+
|
| 245 |
+
def get_topic_history(self, topic: str, hours: int = 24) -> List[Dict[str, Any]]:
|
| 246 |
+
"""
|
| 247 |
+
Get hourly mention counts for a topic.
|
| 248 |
+
|
| 249 |
+
Args:
|
| 250 |
+
topic: Topic to get history for
|
| 251 |
+
hours: Number of hours to look back
|
| 252 |
+
|
| 253 |
+
Returns:
|
| 254 |
+
List of hourly counts
|
| 255 |
+
"""
|
| 256 |
+
topic_hash = self._topic_hash(topic)
|
| 257 |
+
now = datetime.utcnow()
|
| 258 |
+
|
| 259 |
+
history = []
|
| 260 |
+
with sqlite3.connect(self.db_path) as conn:
|
| 261 |
+
for i in range(hours):
|
| 262 |
+
hour_dt = now - timedelta(hours=i)
|
| 263 |
+
hour_bucket = self._get_hour_bucket(hour_dt)
|
| 264 |
+
|
| 265 |
+
result = conn.execute("""
|
| 266 |
+
SELECT count FROM hourly_counts
|
| 267 |
+
WHERE topic_hash = ? AND hour_bucket = ?
|
| 268 |
+
""", (topic_hash, hour_bucket)).fetchone()
|
| 269 |
+
|
| 270 |
+
history.append({
|
| 271 |
+
"hour": hour_bucket,
|
| 272 |
+
"count": result[0] if result else 0
|
| 273 |
+
})
|
| 274 |
+
|
| 275 |
+
return list(reversed(history)) # Oldest first
|
| 276 |
+
|
| 277 |
+
def cleanup_old_data(self, days: int = 7):
|
| 278 |
+
"""
|
| 279 |
+
Remove data older than specified days.
|
| 280 |
+
|
| 281 |
+
Args:
|
| 282 |
+
days: Number of days to keep
|
| 283 |
+
"""
|
| 284 |
+
cutoff = datetime.utcnow() - timedelta(days=days)
|
| 285 |
+
cutoff_str = cutoff.isoformat()
|
| 286 |
+
cutoff_bucket = self._get_hour_bucket(cutoff)
|
| 287 |
+
|
| 288 |
+
with sqlite3.connect(self.db_path) as conn:
|
| 289 |
+
conn.execute("""
|
| 290 |
+
DELETE FROM topic_mentions WHERE timestamp < ?
|
| 291 |
+
""", (cutoff_str,))
|
| 292 |
+
conn.execute("""
|
| 293 |
+
DELETE FROM hourly_counts WHERE hour_bucket < ?
|
| 294 |
+
""", (cutoff_bucket,))
|
| 295 |
+
conn.commit()
|
| 296 |
+
|
| 297 |
+
logger.info(f"[TrendingDetector] Cleaned up data older than {days} days")
|
| 298 |
+
|
| 299 |
+
|
| 300 |
+
# Singleton instance for easy access
|
| 301 |
+
_trending_detector = None
|
| 302 |
+
|
| 303 |
+
|
| 304 |
+
def get_trending_detector() -> TrendingDetector:
|
| 305 |
+
"""Get the global TrendingDetector instance"""
|
| 306 |
+
global _trending_detector
|
| 307 |
+
if _trending_detector is None:
|
| 308 |
+
_trending_detector = TrendingDetector()
|
| 309 |
+
return _trending_detector
|
| 310 |
+
|
| 311 |
+
|
| 312 |
+
# Convenience functions
|
| 313 |
+
def record_topic_mention(topic: str, source: str = None, domain: str = None):
|
| 314 |
+
"""Record a single topic mention"""
|
| 315 |
+
get_trending_detector().record_mention(topic, source, domain)
|
| 316 |
+
|
| 317 |
+
|
| 318 |
+
def get_trending_now(limit: int = 10) -> List[Dict[str, Any]]:
|
| 319 |
+
"""Get current trending topics"""
|
| 320 |
+
return get_trending_detector().get_trending_topics(limit)
|
| 321 |
+
|
| 322 |
+
|
| 323 |
+
def get_spikes() -> List[Dict[str, Any]]:
|
| 324 |
+
"""Get current spike alerts"""
|
| 325 |
+
return get_trending_detector().get_spike_alerts()
|
test_all_tools.py
ADDED
|
@@ -0,0 +1,276 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
test_all_tools.py
|
| 3 |
+
Comprehensive test script for all Roger agentic AI tools
|
| 4 |
+
Runs each tool and validates output format
|
| 5 |
+
"""
|
| 6 |
+
import json
|
| 7 |
+
import sys
|
| 8 |
+
import os
|
| 9 |
+
|
| 10 |
+
# Force UTF-8 output on Windows
|
| 11 |
+
if sys.platform == 'win32':
|
| 12 |
+
import io
|
| 13 |
+
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
|
| 14 |
+
|
| 15 |
+
# Add project root to path
|
| 16 |
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
| 17 |
+
|
| 18 |
+
from datetime import datetime
|
| 19 |
+
|
| 20 |
+
def test_tool(name, func, *args, **kwargs):
|
| 21 |
+
"""Test a tool and report results"""
|
| 22 |
+
print(f"\n{'='*60}")
|
| 23 |
+
print(f"[TEST] {name}")
|
| 24 |
+
print(f"{'='*60}")
|
| 25 |
+
try:
|
| 26 |
+
result = func(*args, **kwargs)
|
| 27 |
+
|
| 28 |
+
# Parse result if it's JSON string
|
| 29 |
+
if isinstance(result, str):
|
| 30 |
+
try:
|
| 31 |
+
parsed = json.loads(result)
|
| 32 |
+
if "error" in parsed:
|
| 33 |
+
print(f"[WARN] TOOL RETURNED ERROR: {parsed['error']}")
|
| 34 |
+
if "solution" in parsed:
|
| 35 |
+
print(f" [TIP] Solution: {parsed['solution']}")
|
| 36 |
+
return {"status": "error", "error": parsed["error"]}
|
| 37 |
+
else:
|
| 38 |
+
print(f"[OK] SUCCESS")
|
| 39 |
+
# Print sample of results
|
| 40 |
+
if "results" in parsed:
|
| 41 |
+
print(f" [DATA] Results count: {len(parsed['results'])}")
|
| 42 |
+
if parsed['results'] and len(parsed['results']) > 0:
|
| 43 |
+
print(f" [SAMPLE] {str(parsed['results'][0])[:200]}...")
|
| 44 |
+
elif isinstance(parsed, dict):
|
| 45 |
+
for key in list(parsed.keys())[:3]:
|
| 46 |
+
val = str(parsed[key])[:100]
|
| 47 |
+
print(f" - {key}: {val}...")
|
| 48 |
+
return {"status": "success", "data": parsed}
|
| 49 |
+
except json.JSONDecodeError:
|
| 50 |
+
print(f"[OK] SUCCESS (non-JSON response)")
|
| 51 |
+
print(f" [SAMPLE] {result[:200]}...")
|
| 52 |
+
return {"status": "success", "data": result}
|
| 53 |
+
else:
|
| 54 |
+
print(f"[OK] SUCCESS")
|
| 55 |
+
print(f" [TYPE] Response type: {type(result)}")
|
| 56 |
+
return {"status": "success", "data": result}
|
| 57 |
+
|
| 58 |
+
except Exception as e:
|
| 59 |
+
print(f"[FAIL] FAILED: {e}")
|
| 60 |
+
return {"status": "failed", "error": str(e)}
|
| 61 |
+
|
| 62 |
+
def main():
|
| 63 |
+
results = {}
|
| 64 |
+
|
| 65 |
+
print("\n" + "="*70)
|
| 66 |
+
print("[START] ROGER AGENTIC AI - COMPREHENSIVE TOOL TESTING")
|
| 67 |
+
print(f" Started: {datetime.now().isoformat()}")
|
| 68 |
+
print("="*70)
|
| 69 |
+
|
| 70 |
+
# =====================================================
|
| 71 |
+
# 1. WEATHER & FLOOD TOOLS (No session required)
|
| 72 |
+
# =====================================================
|
| 73 |
+
print("\n\n[CATEGORY] WEATHER & FLOOD TOOLS")
|
| 74 |
+
print("-"*50)
|
| 75 |
+
|
| 76 |
+
try:
|
| 77 |
+
from src.utils.utils import tool_dmc_alerts
|
| 78 |
+
results["tool_dmc_alerts"] = test_tool("tool_dmc_alerts", tool_dmc_alerts)
|
| 79 |
+
except ImportError as e:
|
| 80 |
+
print(f"[FAIL] Import error: {e}")
|
| 81 |
+
results["tool_dmc_alerts"] = {"status": "import_error", "error": str(e)}
|
| 82 |
+
|
| 83 |
+
try:
|
| 84 |
+
from src.utils.utils import tool_weather_nowcast
|
| 85 |
+
results["tool_weather_nowcast"] = test_tool("tool_weather_nowcast", tool_weather_nowcast)
|
| 86 |
+
except ImportError as e:
|
| 87 |
+
print(f"[FAIL] Import error: {e}")
|
| 88 |
+
results["tool_weather_nowcast"] = {"status": "import_error", "error": str(e)}
|
| 89 |
+
|
| 90 |
+
try:
|
| 91 |
+
from src.utils.utils import tool_rivernet_status
|
| 92 |
+
results["tool_rivernet_status"] = test_tool("tool_rivernet_status", tool_rivernet_status)
|
| 93 |
+
except ImportError as e:
|
| 94 |
+
print(f"[FAIL] Import error: {e}")
|
| 95 |
+
results["tool_rivernet_status"] = {"status": "import_error", "error": str(e)}
|
| 96 |
+
|
| 97 |
+
try:
|
| 98 |
+
from src.utils.utils import tool_district_weather
|
| 99 |
+
results["tool_district_weather"] = test_tool("tool_district_weather", tool_district_weather, "colombo")
|
| 100 |
+
except ImportError as e:
|
| 101 |
+
print(f"[FAIL] Import error: {e}")
|
| 102 |
+
results["tool_district_weather"] = {"status": "import_error", "error": str(e)}
|
| 103 |
+
|
| 104 |
+
# =====================================================
|
| 105 |
+
# 2. NEWS & OFFICIAL SOURCES (No session required)
|
| 106 |
+
# =====================================================
|
| 107 |
+
print("\n\n[CATEGORY] NEWS & OFFICIAL SOURCES")
|
| 108 |
+
print("-"*50)
|
| 109 |
+
|
| 110 |
+
try:
|
| 111 |
+
from src.utils.tool_factory import create_tool_set
|
| 112 |
+
tools = create_tool_set(include_profile_scrapers=False)
|
| 113 |
+
|
| 114 |
+
# Local News
|
| 115 |
+
local_news = tools.get("scrape_local_news")
|
| 116 |
+
if local_news:
|
| 117 |
+
results["scrape_local_news"] = test_tool("scrape_local_news", local_news.invoke, {"keywords": ["sri lanka"], "max_articles": 5})
|
| 118 |
+
else:
|
| 119 |
+
results["scrape_local_news"] = {"status": "not_found", "error": "Tool not found in ToolSet"}
|
| 120 |
+
|
| 121 |
+
# CSE Stock Data
|
| 122 |
+
cse_tool = tools.get("scrape_cse_stock_data")
|
| 123 |
+
if cse_tool:
|
| 124 |
+
results["scrape_cse_stock_data"] = test_tool("scrape_cse_stock_data", cse_tool.invoke, {"symbol": "ASPI", "period": "1d"})
|
| 125 |
+
else:
|
| 126 |
+
results["scrape_cse_stock_data"] = {"status": "not_found", "error": "Tool not found in ToolSet"}
|
| 127 |
+
|
| 128 |
+
# Government Gazette
|
| 129 |
+
gazette_tool = tools.get("scrape_government_gazette")
|
| 130 |
+
if gazette_tool:
|
| 131 |
+
results["scrape_government_gazette"] = test_tool("scrape_government_gazette", gazette_tool.invoke, {"keywords": None, "max_items": 5})
|
| 132 |
+
else:
|
| 133 |
+
results["scrape_government_gazette"] = {"status": "not_found", "error": "Tool not found in ToolSet"}
|
| 134 |
+
|
| 135 |
+
# Parliament Minutes
|
| 136 |
+
parliament_tool = tools.get("scrape_parliament_minutes")
|
| 137 |
+
if parliament_tool:
|
| 138 |
+
results["scrape_parliament_minutes"] = test_tool("scrape_parliament_minutes", parliament_tool.invoke, {"keywords": None, "max_items": 5})
|
| 139 |
+
else:
|
| 140 |
+
results["scrape_parliament_minutes"] = {"status": "not_found", "error": "Tool not found in ToolSet"}
|
| 141 |
+
|
| 142 |
+
# Reddit (no session needed)
|
| 143 |
+
reddit_tool = tools.get("scrape_reddit")
|
| 144 |
+
if reddit_tool:
|
| 145 |
+
results["scrape_reddit"] = test_tool("scrape_reddit", reddit_tool.invoke, {"keywords": ["sri lanka"], "limit": 5})
|
| 146 |
+
else:
|
| 147 |
+
results["scrape_reddit"] = {"status": "not_found", "error": "Tool not found in ToolSet"}
|
| 148 |
+
|
| 149 |
+
except Exception as e:
|
| 150 |
+
print(f"[FAIL] ToolSet creation error: {e}")
|
| 151 |
+
results["tool_factory"] = {"status": "failed", "error": str(e)}
|
| 152 |
+
|
| 153 |
+
# =====================================================
|
| 154 |
+
# 3. SOCIAL MEDIA TOOLS (Session required)
|
| 155 |
+
# =====================================================
|
| 156 |
+
print("\n\n[CATEGORY] SOCIAL MEDIA TOOLS (Session Required)")
|
| 157 |
+
print("-"*50)
|
| 158 |
+
|
| 159 |
+
try:
|
| 160 |
+
from src.utils.tool_factory import create_tool_set
|
| 161 |
+
tools = create_tool_set(include_profile_scrapers=True)
|
| 162 |
+
|
| 163 |
+
# Twitter Search
|
| 164 |
+
twitter_tool = tools.get("scrape_twitter")
|
| 165 |
+
if twitter_tool:
|
| 166 |
+
results["scrape_twitter"] = test_tool("scrape_twitter", twitter_tool.invoke, {"query": "sri lanka", "max_items": 3})
|
| 167 |
+
else:
|
| 168 |
+
results["scrape_twitter"] = {"status": "not_found", "error": "Tool not found in ToolSet"}
|
| 169 |
+
|
| 170 |
+
# Facebook Search
|
| 171 |
+
fb_tool = tools.get("scrape_facebook")
|
| 172 |
+
if fb_tool:
|
| 173 |
+
results["scrape_facebook"] = test_tool("scrape_facebook", fb_tool.invoke, {"keywords": ["sri lanka"], "max_items": 3})
|
| 174 |
+
else:
|
| 175 |
+
results["scrape_facebook"] = {"status": "not_found", "error": "Tool not found in ToolSet"}
|
| 176 |
+
|
| 177 |
+
# LinkedIn Search
|
| 178 |
+
linkedin_tool = tools.get("scrape_linkedin")
|
| 179 |
+
if linkedin_tool:
|
| 180 |
+
results["scrape_linkedin"] = test_tool("scrape_linkedin", linkedin_tool.invoke, {"keywords": ["sri lanka"], "max_items": 3})
|
| 181 |
+
else:
|
| 182 |
+
results["scrape_linkedin"] = {"status": "not_found", "error": "Tool not found in ToolSet"}
|
| 183 |
+
|
| 184 |
+
# Instagram Search
|
| 185 |
+
instagram_tool = tools.get("scrape_instagram")
|
| 186 |
+
if instagram_tool:
|
| 187 |
+
results["scrape_instagram"] = test_tool("scrape_instagram", instagram_tool.invoke, {"keywords": ["srilanka"], "max_items": 3})
|
| 188 |
+
else:
|
| 189 |
+
results["scrape_instagram"] = {"status": "not_found", "error": "Tool not found in ToolSet"}
|
| 190 |
+
|
| 191 |
+
except Exception as e:
|
| 192 |
+
print(f"[FAIL] Social media tools error: {e}")
|
| 193 |
+
|
| 194 |
+
# =====================================================
|
| 195 |
+
# 4. PROFILE SCRAPERS (Session required)
|
| 196 |
+
# =====================================================
|
| 197 |
+
print("\n\n[CATEGORY] PROFILE SCRAPERS (Session Required)")
|
| 198 |
+
print("-"*50)
|
| 199 |
+
|
| 200 |
+
try:
|
| 201 |
+
from src.utils.profile_scrapers import scrape_twitter_profile, scrape_facebook_profile
|
| 202 |
+
|
| 203 |
+
# Twitter Profile
|
| 204 |
+
results["scrape_twitter_profile"] = test_tool("scrape_twitter_profile", scrape_twitter_profile.invoke, {"username": "SLTMobitel", "max_items": 3})
|
| 205 |
+
|
| 206 |
+
# Facebook Profile
|
| 207 |
+
results["scrape_facebook_profile"] = test_tool("scrape_facebook_profile", scrape_facebook_profile.invoke, {"profile_url": "https://www.facebook.com/DialogAxiata", "max_items": 3})
|
| 208 |
+
|
| 209 |
+
except Exception as e:
|
| 210 |
+
print(f"[FAIL] Profile scrapers error: {e}")
|
| 211 |
+
|
| 212 |
+
# =====================================================
|
| 213 |
+
# SUMMARY REPORT
|
| 214 |
+
# =====================================================
|
| 215 |
+
print("\n\n" + "="*70)
|
| 216 |
+
print("[SUMMARY] TEST RESULTS")
|
| 217 |
+
print("="*70)
|
| 218 |
+
|
| 219 |
+
success_count = 0
|
| 220 |
+
error_count = 0
|
| 221 |
+
session_issues = []
|
| 222 |
+
other_errors = []
|
| 223 |
+
|
| 224 |
+
for tool_name, result in results.items():
|
| 225 |
+
status = result.get("status", "unknown")
|
| 226 |
+
if status == "success":
|
| 227 |
+
success_count += 1
|
| 228 |
+
print(f"[OK] {tool_name}: SUCCESS")
|
| 229 |
+
elif status == "error":
|
| 230 |
+
error = result.get("error", "Unknown error")
|
| 231 |
+
if "session" in error.lower() or "Session" in error:
|
| 232 |
+
session_issues.append(tool_name)
|
| 233 |
+
print(f"[SESSION] {tool_name}: SESSION ISSUE - {error[:50]}")
|
| 234 |
+
else:
|
| 235 |
+
other_errors.append((tool_name, error))
|
| 236 |
+
print(f"[WARN] {tool_name}: ERROR - {error[:50]}")
|
| 237 |
+
error_count += 1
|
| 238 |
+
else:
|
| 239 |
+
error = result.get("error", "Unknown")
|
| 240 |
+
other_errors.append((tool_name, error))
|
| 241 |
+
print(f"[FAIL] {tool_name}: {status.upper()} - {error[:50]}")
|
| 242 |
+
error_count += 1
|
| 243 |
+
|
| 244 |
+
print(f"\n[TOTALS]:")
|
| 245 |
+
print(f" [OK] Successful: {success_count}")
|
| 246 |
+
print(f" [FAIL] Errors: {error_count}")
|
| 247 |
+
|
| 248 |
+
if session_issues:
|
| 249 |
+
print(f"\n[SESSION] TOOLS NEEDING SESSION REFRESH:")
|
| 250 |
+
for tool in session_issues:
|
| 251 |
+
print(f" - {tool}")
|
| 252 |
+
|
| 253 |
+
if other_errors:
|
| 254 |
+
print(f"\n[WARN] TOOLS WITH OTHER ERRORS:")
|
| 255 |
+
for tool, error in other_errors:
|
| 256 |
+
print(f" - {tool}: {error[:80]}")
|
| 257 |
+
|
| 258 |
+
# Save results to file
|
| 259 |
+
with open("tool_test_results.json", "w", encoding="utf-8") as f:
|
| 260 |
+
json.dump({
|
| 261 |
+
"timestamp": datetime.now().isoformat(),
|
| 262 |
+
"summary": {
|
| 263 |
+
"success": success_count,
|
| 264 |
+
"errors": error_count,
|
| 265 |
+
"session_issues": session_issues,
|
| 266 |
+
"other_errors": [t[0] for t in other_errors]
|
| 267 |
+
},
|
| 268 |
+
"details": {k: {"status": v.get("status"), "error": v.get("error")} for k, v in results.items()}
|
| 269 |
+
}, f, indent=2)
|
| 270 |
+
print(f"\n[SAVED] Results saved to: tool_test_results.json")
|
| 271 |
+
|
| 272 |
+
return results
|
| 273 |
+
|
| 274 |
+
if __name__ == "__main__":
|
| 275 |
+
main()
|
| 276 |
+
|
test_ml_pipelines.py
ADDED
|
@@ -0,0 +1,231 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
test_ml_pipelines.py
|
| 3 |
+
Test script to verify all 4 ML pipelines are working correctly
|
| 4 |
+
"""
|
| 5 |
+
import sys
|
| 6 |
+
import os
|
| 7 |
+
import io
|
| 8 |
+
|
| 9 |
+
# Force UTF-8 output on Windows
|
| 10 |
+
if sys.platform == 'win32':
|
| 11 |
+
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
|
| 12 |
+
|
| 13 |
+
# Add project root to path
|
| 14 |
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
| 15 |
+
|
| 16 |
+
print("="*70)
|
| 17 |
+
print("[ML PIPELINE TESTING]")
|
| 18 |
+
print("="*70)
|
| 19 |
+
|
| 20 |
+
results = {}
|
| 21 |
+
|
| 22 |
+
# =============================================================================
|
| 23 |
+
# 1. ANOMALY DETECTION PIPELINE
|
| 24 |
+
# =============================================================================
|
| 25 |
+
print("\n" + "="*60)
|
| 26 |
+
print("[1] ANOMALY DETECTION PIPELINE")
|
| 27 |
+
print("="*60)
|
| 28 |
+
|
| 29 |
+
try:
|
| 30 |
+
# Check if model exists
|
| 31 |
+
from pathlib import Path
|
| 32 |
+
model_dir = Path(__file__).parent / "models" / "anomaly-detection" / "output"
|
| 33 |
+
models_found = list(model_dir.glob("*.joblib")) if model_dir.exists() else []
|
| 34 |
+
|
| 35 |
+
if models_found:
|
| 36 |
+
print(f"[OK] Found {len(models_found)} trained models:")
|
| 37 |
+
for m in models_found[:3]:
|
| 38 |
+
print(f" - {m.name}")
|
| 39 |
+
|
| 40 |
+
# Try to load and run prediction
|
| 41 |
+
from models.anomaly_detection.src.utils.vectorizer import get_vectorizer
|
| 42 |
+
vectorizer = get_vectorizer()
|
| 43 |
+
print(f"[OK] Vectorizer loaded")
|
| 44 |
+
|
| 45 |
+
import joblib
|
| 46 |
+
model = joblib.load(models_found[0])
|
| 47 |
+
print(f"[OK] Model loaded: {models_found[0].name}")
|
| 48 |
+
|
| 49 |
+
# Test prediction
|
| 50 |
+
test_text = "Breaking news: Major political announcement in Colombo"
|
| 51 |
+
vector = vectorizer.vectorize(test_text, "en")
|
| 52 |
+
prediction = model.predict([vector])[0]
|
| 53 |
+
score = -model.decision_function([vector])[0] if hasattr(model, 'decision_function') else 0
|
| 54 |
+
|
| 55 |
+
print(f"[OK] Test prediction: is_anomaly={prediction==-1}, score={score:.3f}")
|
| 56 |
+
results["anomaly_detection"] = {"status": "success", "models": len(models_found)}
|
| 57 |
+
else:
|
| 58 |
+
print("[WARN] No trained models found. Run training first.")
|
| 59 |
+
print(" Command: python models/anomaly-detection/main.py --mode train")
|
| 60 |
+
results["anomaly_detection"] = {"status": "not_trained"}
|
| 61 |
+
|
| 62 |
+
except Exception as e:
|
| 63 |
+
print(f"[FAIL] Anomaly Detection error: {e}")
|
| 64 |
+
results["anomaly_detection"] = {"status": "error", "error": str(e)}
|
| 65 |
+
|
| 66 |
+
# =============================================================================
|
| 67 |
+
# 2. WEATHER PREDICTION PIPELINE
|
| 68 |
+
# =============================================================================
|
| 69 |
+
print("\n" + "="*60)
|
| 70 |
+
print("[2] WEATHER PREDICTION PIPELINE")
|
| 71 |
+
print("="*60)
|
| 72 |
+
|
| 73 |
+
try:
|
| 74 |
+
from pathlib import Path
|
| 75 |
+
weather_model_dir = Path(__file__).parent / "models" / "weather-prediction" / "artifacts" / "models"
|
| 76 |
+
weather_models = list(weather_model_dir.glob("*.h5")) if weather_model_dir.exists() else []
|
| 77 |
+
|
| 78 |
+
predictions_dir = Path(__file__).parent / "models" / "weather-prediction" / "output" / "predictions"
|
| 79 |
+
prediction_files = list(predictions_dir.glob("*.json")) if predictions_dir.exists() else []
|
| 80 |
+
|
| 81 |
+
if weather_models:
|
| 82 |
+
print(f"[OK] Found {len(weather_models)} trained LSTM models:")
|
| 83 |
+
for m in weather_models[:5]:
|
| 84 |
+
print(f" - {m.name}")
|
| 85 |
+
|
| 86 |
+
# Check for predictions
|
| 87 |
+
if prediction_files:
|
| 88 |
+
import json
|
| 89 |
+
latest = max(prediction_files, key=lambda p: p.stat().st_mtime)
|
| 90 |
+
with open(latest) as f:
|
| 91 |
+
preds = json.load(f)
|
| 92 |
+
districts = preds.get("districts", {})
|
| 93 |
+
print(f"[OK] Found predictions for {len(districts)} districts")
|
| 94 |
+
print(f" Latest prediction date: {preds.get('prediction_date', 'N/A')}")
|
| 95 |
+
|
| 96 |
+
# Show sample
|
| 97 |
+
if districts:
|
| 98 |
+
sample_district = list(districts.keys())[0]
|
| 99 |
+
sample = districts[sample_district]
|
| 100 |
+
print(f" Sample ({sample_district}):")
|
| 101 |
+
print(f" - Temp: {sample.get('temp_max', 'N/A')}C - {sample.get('temp_min', 'N/A')}C")
|
| 102 |
+
print(f" - Rain: {sample.get('rainfall_mm', 'N/A')}mm")
|
| 103 |
+
|
| 104 |
+
results["weather_prediction"] = {"status": "success", "models": len(weather_models), "districts": len(districts)}
|
| 105 |
+
else:
|
| 106 |
+
print("[WARN] No prediction files found. Run predictor.")
|
| 107 |
+
results["weather_prediction"] = {"status": "models_only", "models": len(weather_models)}
|
| 108 |
+
else:
|
| 109 |
+
print("[WARN] No trained models found")
|
| 110 |
+
print(" Command: python models/weather-prediction/main.py --mode train")
|
| 111 |
+
results["weather_prediction"] = {"status": "not_trained"}
|
| 112 |
+
|
| 113 |
+
except Exception as e:
|
| 114 |
+
print(f"[FAIL] Weather Prediction error: {e}")
|
| 115 |
+
results["weather_prediction"] = {"status": "error", "error": str(e)}
|
| 116 |
+
|
| 117 |
+
# =============================================================================
|
| 118 |
+
# 3. CURRENCY PREDICTION PIPELINE
|
| 119 |
+
# =============================================================================
|
| 120 |
+
print("\n" + "="*60)
|
| 121 |
+
print("[3] CURRENCY PREDICTION PIPELINE (USD/LKR)")
|
| 122 |
+
print("="*60)
|
| 123 |
+
|
| 124 |
+
try:
|
| 125 |
+
from pathlib import Path
|
| 126 |
+
currency_model_dir = Path(__file__).parent / "models" / "currency-volatility-prediction" / "artifacts" / "models"
|
| 127 |
+
currency_model = currency_model_dir / "gru_usd_lkr.h5" if currency_model_dir.exists() else None
|
| 128 |
+
|
| 129 |
+
predictions_dir = Path(__file__).parent / "models" / "currency-volatility-prediction" / "output" / "predictions"
|
| 130 |
+
prediction_files = list(predictions_dir.glob("*.json")) if predictions_dir.exists() else []
|
| 131 |
+
|
| 132 |
+
if currency_model and currency_model.exists():
|
| 133 |
+
print(f"[OK] Found GRU model: {currency_model.name}")
|
| 134 |
+
|
| 135 |
+
# Check for predictions
|
| 136 |
+
if prediction_files:
|
| 137 |
+
import json
|
| 138 |
+
latest = max(prediction_files, key=lambda p: p.stat().st_mtime)
|
| 139 |
+
with open(latest) as f:
|
| 140 |
+
pred = json.load(f)
|
| 141 |
+
|
| 142 |
+
print(f"[OK] Latest prediction found:")
|
| 143 |
+
print(f" - Current Rate: {pred.get('current_rate', 'N/A')} LKR")
|
| 144 |
+
print(f" - Predicted: {pred.get('predicted_rate', 'N/A')} LKR")
|
| 145 |
+
print(f" - Change: {pred.get('change_percent', 'N/A')}%")
|
| 146 |
+
print(f" - Direction: {pred.get('direction', 'N/A')}")
|
| 147 |
+
|
| 148 |
+
results["currency_prediction"] = {"status": "success", "rate": pred.get("predicted_rate")}
|
| 149 |
+
else:
|
| 150 |
+
print("[WARN] No prediction files found")
|
| 151 |
+
results["currency_prediction"] = {"status": "model_only"}
|
| 152 |
+
else:
|
| 153 |
+
print("[WARN] No trained model found")
|
| 154 |
+
print(" Command: python models/currency-volatility-prediction/main.py --mode train")
|
| 155 |
+
results["currency_prediction"] = {"status": "not_trained"}
|
| 156 |
+
|
| 157 |
+
except Exception as e:
|
| 158 |
+
print(f"[FAIL] Currency Prediction error: {e}")
|
| 159 |
+
results["currency_prediction"] = {"status": "error", "error": str(e)}
|
| 160 |
+
|
| 161 |
+
# =============================================================================
|
| 162 |
+
# 4. STOCK PRICE PREDICTION PIPELINE
|
| 163 |
+
# =============================================================================
|
| 164 |
+
print("\n" + "="*60)
|
| 165 |
+
print("[4] STOCK PRICE PREDICTION PIPELINE")
|
| 166 |
+
print("="*60)
|
| 167 |
+
|
| 168 |
+
try:
|
| 169 |
+
from pathlib import Path
|
| 170 |
+
stock_model_dir = Path(__file__).parent / "models" / "stock-price-prediction" / "artifacts" / "models"
|
| 171 |
+
stock_models = list(stock_model_dir.glob("*.h5")) if stock_model_dir.exists() else []
|
| 172 |
+
|
| 173 |
+
predictions_dir = Path(__file__).parent / "models" / "stock-price-prediction" / "output" / "predictions"
|
| 174 |
+
prediction_files = list(predictions_dir.glob("*.json")) if predictions_dir.exists() else []
|
| 175 |
+
|
| 176 |
+
if stock_models:
|
| 177 |
+
print(f"[OK] Found {len(stock_models)} stock models:")
|
| 178 |
+
for m in stock_models[:5]:
|
| 179 |
+
print(f" - {m.name}")
|
| 180 |
+
|
| 181 |
+
# Check for predictions
|
| 182 |
+
if prediction_files:
|
| 183 |
+
import json
|
| 184 |
+
latest = max(prediction_files, key=lambda p: p.stat().st_mtime)
|
| 185 |
+
with open(latest) as f:
|
| 186 |
+
preds = json.load(f)
|
| 187 |
+
|
| 188 |
+
stocks = preds.get("stocks", preds.get("predictions", {}))
|
| 189 |
+
print(f"[OK] Found predictions for {len(stocks)} stocks")
|
| 190 |
+
|
| 191 |
+
# Show sample
|
| 192 |
+
if stocks:
|
| 193 |
+
sample_stock = list(stocks.keys())[0] if isinstance(stocks, dict) else stocks[0]
|
| 194 |
+
if isinstance(stocks, dict):
|
| 195 |
+
sample = stocks[sample_stock]
|
| 196 |
+
print(f" Sample ({sample_stock}):")
|
| 197 |
+
print(f" - Current: {sample.get('current_price', 'N/A')}")
|
| 198 |
+
print(f" - Predicted: {sample.get('predicted_price', 'N/A')}")
|
| 199 |
+
|
| 200 |
+
results["stock_prediction"] = {"status": "success", "models": len(stock_models), "stocks": len(stocks)}
|
| 201 |
+
else:
|
| 202 |
+
print("[WARN] No prediction files found")
|
| 203 |
+
results["stock_prediction"] = {"status": "models_only", "models": len(stock_models)}
|
| 204 |
+
else:
|
| 205 |
+
print("[WARN] No trained models found")
|
| 206 |
+
print(" Command: python models/stock-price-prediction/main.py --mode train")
|
| 207 |
+
results["stock_prediction"] = {"status": "not_trained"}
|
| 208 |
+
|
| 209 |
+
except Exception as e:
|
| 210 |
+
print(f"[FAIL] Stock Prediction error: {e}")
|
| 211 |
+
results["stock_prediction"] = {"status": "error", "error": str(e)}
|
| 212 |
+
|
| 213 |
+
# =============================================================================
|
| 214 |
+
# SUMMARY
|
| 215 |
+
# =============================================================================
|
| 216 |
+
print("\n" + "="*70)
|
| 217 |
+
print("[SUMMARY] ML PIPELINE STATUS")
|
| 218 |
+
print("="*70)
|
| 219 |
+
|
| 220 |
+
for pipeline, result in results.items():
|
| 221 |
+
status = result.get("status", "unknown")
|
| 222 |
+
if status == "success":
|
| 223 |
+
print(f"[OK] {pipeline}: Working")
|
| 224 |
+
elif status == "not_trained":
|
| 225 |
+
print(f"[WARN] {pipeline}: Not trained yet")
|
| 226 |
+
elif status in ["model_only", "models_only"]:
|
| 227 |
+
print(f"[WARN] {pipeline}: Model exists, no recent predictions")
|
| 228 |
+
else:
|
| 229 |
+
print(f"[FAIL] {pipeline}: {result.get('error', status)}")
|
| 230 |
+
|
| 231 |
+
print("="*70)
|
test_news_tools.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
test_news_tools.py
|
| 3 |
+
Test for news and official source tools (no social media sessions required)
|
| 4 |
+
"""
|
| 5 |
+
import sys
|
| 6 |
+
import os
|
| 7 |
+
import io
|
| 8 |
+
import json
|
| 9 |
+
|
| 10 |
+
# Force UTF-8 output
|
| 11 |
+
if sys.platform == 'win32':
|
| 12 |
+
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
|
| 13 |
+
|
| 14 |
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
| 15 |
+
|
| 16 |
+
print("="*60)
|
| 17 |
+
print("[TEST] NEWS & OFFICIAL SOURCE TOOLS")
|
| 18 |
+
print("="*60)
|
| 19 |
+
|
| 20 |
+
try:
|
| 21 |
+
from src.utils.tool_factory import create_tool_set
|
| 22 |
+
print("[OK] Tool factory imported")
|
| 23 |
+
tools = create_tool_set(include_profile_scrapers=False)
|
| 24 |
+
print("[OK] ToolSet created")
|
| 25 |
+
except Exception as e:
|
| 26 |
+
print(f"[FAIL] Could not create ToolSet: {e}")
|
| 27 |
+
sys.exit(1)
|
| 28 |
+
|
| 29 |
+
# Test 1: Local News
|
| 30 |
+
print("\n[1] Testing scrape_local_news...")
|
| 31 |
+
try:
|
| 32 |
+
local_news = tools.get("scrape_local_news")
|
| 33 |
+
if local_news:
|
| 34 |
+
result = local_news.invoke({"keywords": ["sri lanka"], "max_articles": 3})
|
| 35 |
+
parsed = json.loads(result) if isinstance(result, str) else result
|
| 36 |
+
if "error" in parsed:
|
| 37 |
+
print(f"[WARN] Local News returned error: {parsed['error']}")
|
| 38 |
+
else:
|
| 39 |
+
count = len(parsed.get('results', []))
|
| 40 |
+
print(f"[OK] Local News: {count} articles fetched")
|
| 41 |
+
if count > 0:
|
| 42 |
+
print(f" Sample: {str(parsed['results'][0])[:150]}...")
|
| 43 |
+
else:
|
| 44 |
+
print("[WARN] scrape_local_news not found in ToolSet")
|
| 45 |
+
except Exception as e:
|
| 46 |
+
print(f"[FAIL] Local News: {e}")
|
| 47 |
+
|
| 48 |
+
# Test 2: CSE Stock Data
|
| 49 |
+
print("\n[2] Testing scrape_cse_stock_data...")
|
| 50 |
+
try:
|
| 51 |
+
cse_tool = tools.get("scrape_cse_stock_data")
|
| 52 |
+
if cse_tool:
|
| 53 |
+
result = cse_tool.invoke({"symbol": "ASPI", "period": "1d"})
|
| 54 |
+
parsed = json.loads(result) if isinstance(result, str) else result
|
| 55 |
+
if "error" in parsed:
|
| 56 |
+
print(f"[WARN] CSE Stock returned error: {parsed['error']}")
|
| 57 |
+
else:
|
| 58 |
+
print(f"[OK] CSE Stock: {str(parsed)[:200]}...")
|
| 59 |
+
else:
|
| 60 |
+
print("[WARN] scrape_cse_stock_data not found in ToolSet")
|
| 61 |
+
except Exception as e:
|
| 62 |
+
print(f"[FAIL] CSE Stock: {e}")
|
| 63 |
+
|
| 64 |
+
# Test 3: Government Gazette
|
| 65 |
+
print("\n[3] Testing scrape_government_gazette...")
|
| 66 |
+
try:
|
| 67 |
+
gazette_tool = tools.get("scrape_government_gazette")
|
| 68 |
+
if gazette_tool:
|
| 69 |
+
result = gazette_tool.invoke({"keywords": None, "max_items": 3})
|
| 70 |
+
parsed = json.loads(result) if isinstance(result, str) else result
|
| 71 |
+
if "error" in parsed:
|
| 72 |
+
print(f"[WARN] Gazette returned error: {parsed['error']}")
|
| 73 |
+
else:
|
| 74 |
+
count = len(parsed.get('results', []))
|
| 75 |
+
print(f"[OK] Gazette: {count} items fetched")
|
| 76 |
+
else:
|
| 77 |
+
print("[WARN] scrape_government_gazette not found in ToolSet")
|
| 78 |
+
except Exception as e:
|
| 79 |
+
print(f"[FAIL] Gazette: {e}")
|
| 80 |
+
|
| 81 |
+
# Test 4: Parliament Minutes
|
| 82 |
+
print("\n[4] Testing scrape_parliament_minutes...")
|
| 83 |
+
try:
|
| 84 |
+
parliament_tool = tools.get("scrape_parliament_minutes")
|
| 85 |
+
if parliament_tool:
|
| 86 |
+
result = parliament_tool.invoke({"keywords": None, "max_items": 3})
|
| 87 |
+
parsed = json.loads(result) if isinstance(result, str) else result
|
| 88 |
+
if "error" in parsed:
|
| 89 |
+
print(f"[WARN] Parliament returned error: {parsed['error']}")
|
| 90 |
+
else:
|
| 91 |
+
count = len(parsed.get('results', []))
|
| 92 |
+
print(f"[OK] Parliament: {count} items fetched")
|
| 93 |
+
else:
|
| 94 |
+
print("[WARN] scrape_parliament_minutes not found in ToolSet")
|
| 95 |
+
except Exception as e:
|
| 96 |
+
print(f"[FAIL] Parliament: {e}")
|
| 97 |
+
|
| 98 |
+
# Test 5: Reddit (no auth needed)
|
| 99 |
+
print("\n[5] Testing scrape_reddit...")
|
| 100 |
+
try:
|
| 101 |
+
reddit_tool = tools.get("scrape_reddit")
|
| 102 |
+
if reddit_tool:
|
| 103 |
+
result = reddit_tool.invoke({"keywords": ["sri lanka"], "limit": 3})
|
| 104 |
+
parsed = json.loads(result) if isinstance(result, str) else result
|
| 105 |
+
if "error" in parsed:
|
| 106 |
+
print(f"[WARN] Reddit returned error: {parsed['error']}")
|
| 107 |
+
else:
|
| 108 |
+
count = len(parsed.get('results', parsed.get('posts', [])))
|
| 109 |
+
print(f"[OK] Reddit: {count} posts fetched")
|
| 110 |
+
else:
|
| 111 |
+
print("[WARN] scrape_reddit not found in ToolSet")
|
| 112 |
+
except Exception as e:
|
| 113 |
+
print(f"[FAIL] Reddit: {e}")
|
| 114 |
+
|
| 115 |
+
print("\n" + "="*60)
|
| 116 |
+
print("[DONE] News & official source tools test complete")
|
| 117 |
+
print("="*60)
|
test_social_tools.py
ADDED
|
@@ -0,0 +1,185 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
test_social_tools.py
|
| 3 |
+
Test for social media tools (session required)
|
| 4 |
+
Will identify which sessions need to be created/refreshed
|
| 5 |
+
"""
|
| 6 |
+
import sys
|
| 7 |
+
import os
|
| 8 |
+
import io
|
| 9 |
+
import json
|
| 10 |
+
|
| 11 |
+
# Force UTF-8 output
|
| 12 |
+
if sys.platform == 'win32':
|
| 13 |
+
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
|
| 14 |
+
|
| 15 |
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
| 16 |
+
|
| 17 |
+
print("="*60)
|
| 18 |
+
print("[TEST] SOCIAL MEDIA TOOLS (Session Required)")
|
| 19 |
+
print("="*60)
|
| 20 |
+
|
| 21 |
+
# Check for existing sessions
|
| 22 |
+
print("\n[SESSIONS] Checking for existing session files...")
|
| 23 |
+
session_dirs = [
|
| 24 |
+
"src/utils/.sessions",
|
| 25 |
+
".sessions",
|
| 26 |
+
]
|
| 27 |
+
|
| 28 |
+
session_files = {}
|
| 29 |
+
for dir in session_dirs:
|
| 30 |
+
if os.path.exists(dir):
|
| 31 |
+
for f in os.listdir(dir):
|
| 32 |
+
if f.endswith('.json'):
|
| 33 |
+
session_files[f] = os.path.join(dir, f)
|
| 34 |
+
|
| 35 |
+
if session_files:
|
| 36 |
+
print(f"[OK] Found {len(session_files)} session files:")
|
| 37 |
+
for name, path in session_files.items():
|
| 38 |
+
print(f" - {name}: {path}")
|
| 39 |
+
else:
|
| 40 |
+
print("[WARN] No session files found!")
|
| 41 |
+
print(" Please create sessions using the session manager")
|
| 42 |
+
|
| 43 |
+
# Try to create ToolSet with profile scrapers
|
| 44 |
+
print("\n[TEST] Creating ToolSet with profile scrapers...")
|
| 45 |
+
try:
|
| 46 |
+
from src.utils.tool_factory import create_tool_set
|
| 47 |
+
tools = create_tool_set(include_profile_scrapers=True)
|
| 48 |
+
print("[OK] ToolSet created")
|
| 49 |
+
except Exception as e:
|
| 50 |
+
print(f"[FAIL] Could not create ToolSet: {e}")
|
| 51 |
+
sys.exit(1)
|
| 52 |
+
|
| 53 |
+
# Test Twitter
|
| 54 |
+
print("\n[1] Testing scrape_twitter (keyword search)...")
|
| 55 |
+
try:
|
| 56 |
+
twitter_tool = tools.get("scrape_twitter")
|
| 57 |
+
if twitter_tool:
|
| 58 |
+
result = twitter_tool.invoke({"query": "sri lanka", "max_items": 2})
|
| 59 |
+
parsed = json.loads(result) if isinstance(result, str) else result
|
| 60 |
+
if isinstance(parsed, dict) and "error" in parsed:
|
| 61 |
+
if "session" in str(parsed['error']).lower():
|
| 62 |
+
print(f"[SESSION] Twitter: Session not found or expired")
|
| 63 |
+
print(f" Error: {parsed['error'][:100]}")
|
| 64 |
+
else:
|
| 65 |
+
print(f"[WARN] Twitter error: {parsed['error'][:100]}")
|
| 66 |
+
elif isinstance(parsed, dict):
|
| 67 |
+
count = len(parsed.get('results', []))
|
| 68 |
+
print(f"[OK] Twitter: {count} tweets fetched")
|
| 69 |
+
elif isinstance(parsed, list):
|
| 70 |
+
print(f"[OK] Twitter: {len(parsed)} tweets fetched")
|
| 71 |
+
else:
|
| 72 |
+
print(f"[OK] Twitter returned: {type(parsed)}")
|
| 73 |
+
else:
|
| 74 |
+
print("[WARN] scrape_twitter not found in ToolSet")
|
| 75 |
+
except Exception as e:
|
| 76 |
+
print(f"[FAIL] Twitter: {e}")
|
| 77 |
+
|
| 78 |
+
# Test Facebook
|
| 79 |
+
print("\n[2] Testing scrape_facebook (keyword search)...")
|
| 80 |
+
try:
|
| 81 |
+
fb_tool = tools.get("scrape_facebook")
|
| 82 |
+
if fb_tool:
|
| 83 |
+
result = fb_tool.invoke({"keywords": ["sri lanka"], "max_items": 2})
|
| 84 |
+
parsed = json.loads(result) if isinstance(result, str) else result
|
| 85 |
+
if isinstance(parsed, dict) and "error" in parsed:
|
| 86 |
+
if "session" in str(parsed['error']).lower():
|
| 87 |
+
print(f"[SESSION] Facebook: Session not found or expired")
|
| 88 |
+
print(f" Error: {parsed['error'][:100]}")
|
| 89 |
+
else:
|
| 90 |
+
print(f"[WARN] Facebook error: {parsed['error'][:100]}")
|
| 91 |
+
elif isinstance(parsed, dict):
|
| 92 |
+
count = len(parsed.get('results', []))
|
| 93 |
+
print(f"[OK] Facebook: {count} posts fetched")
|
| 94 |
+
elif isinstance(parsed, list):
|
| 95 |
+
print(f"[OK] Facebook: {len(parsed)} posts fetched")
|
| 96 |
+
else:
|
| 97 |
+
print(f"[OK] Facebook returned: {type(parsed)}")
|
| 98 |
+
else:
|
| 99 |
+
print("[WARN] scrape_facebook not found in ToolSet")
|
| 100 |
+
except Exception as e:
|
| 101 |
+
print(f"[FAIL] Facebook: {e}")
|
| 102 |
+
|
| 103 |
+
# Test LinkedIn
|
| 104 |
+
print("\n[3] Testing scrape_linkedin (keyword search)...")
|
| 105 |
+
try:
|
| 106 |
+
linkedin_tool = tools.get("scrape_linkedin")
|
| 107 |
+
if linkedin_tool:
|
| 108 |
+
result = linkedin_tool.invoke({"keywords": ["sri lanka"], "max_items": 2})
|
| 109 |
+
parsed = json.loads(result) if isinstance(result, str) else result
|
| 110 |
+
if isinstance(parsed, dict) and "error" in parsed:
|
| 111 |
+
if "session" in str(parsed['error']).lower():
|
| 112 |
+
print(f"[SESSION] LinkedIn: Session not found or expired")
|
| 113 |
+
print(f" Error: {parsed['error'][:100]}")
|
| 114 |
+
else:
|
| 115 |
+
print(f"[WARN] LinkedIn error: {parsed['error'][:100]}")
|
| 116 |
+
elif isinstance(parsed, dict):
|
| 117 |
+
count = len(parsed.get('results', []))
|
| 118 |
+
print(f"[OK] LinkedIn: {count} posts fetched")
|
| 119 |
+
elif isinstance(parsed, list):
|
| 120 |
+
print(f"[OK] LinkedIn: {len(parsed)} posts fetched")
|
| 121 |
+
else:
|
| 122 |
+
print(f"[OK] LinkedIn returned: {type(parsed)}")
|
| 123 |
+
else:
|
| 124 |
+
print("[WARN] scrape_linkedin not found in ToolSet")
|
| 125 |
+
except Exception as e:
|
| 126 |
+
print(f"[FAIL] LinkedIn: {e}")
|
| 127 |
+
|
| 128 |
+
# Test Instagram
|
| 129 |
+
print("\n[4] Testing scrape_instagram (hashtag search)...")
|
| 130 |
+
try:
|
| 131 |
+
instagram_tool = tools.get("scrape_instagram")
|
| 132 |
+
if instagram_tool:
|
| 133 |
+
result = instagram_tool.invoke({"keywords": ["srilanka"], "max_items": 2})
|
| 134 |
+
parsed = json.loads(result) if isinstance(result, str) else result
|
| 135 |
+
if isinstance(parsed, dict) and "error" in parsed:
|
| 136 |
+
if "session" in str(parsed['error']).lower():
|
| 137 |
+
print(f"[SESSION] Instagram: Session not found or expired")
|
| 138 |
+
print(f" Error: {parsed['error'][:100]}")
|
| 139 |
+
else:
|
| 140 |
+
print(f"[WARN] Instagram error: {parsed['error'][:100]}")
|
| 141 |
+
elif isinstance(parsed, dict):
|
| 142 |
+
count = len(parsed.get('results', []))
|
| 143 |
+
print(f"[OK] Instagram: {count} posts fetched")
|
| 144 |
+
elif isinstance(parsed, list):
|
| 145 |
+
print(f"[OK] Instagram: {len(parsed)} posts fetched")
|
| 146 |
+
else:
|
| 147 |
+
print(f"[OK] Instagram returned: {type(parsed)}")
|
| 148 |
+
else:
|
| 149 |
+
print("[WARN] scrape_instagram not found in ToolSet")
|
| 150 |
+
except Exception as e:
|
| 151 |
+
print(f"[FAIL] Instagram: {e}")
|
| 152 |
+
|
| 153 |
+
# Test Profile Scrapers
|
| 154 |
+
print("\n[5] Testing scrape_twitter_profile (specific account)...")
|
| 155 |
+
try:
|
| 156 |
+
from src.utils.profile_scrapers import scrape_twitter_profile
|
| 157 |
+
result = scrape_twitter_profile.invoke({"username": "SLTMobitel", "max_items": 2})
|
| 158 |
+
parsed = json.loads(result) if isinstance(result, str) else result
|
| 159 |
+
if isinstance(parsed, dict) and "error" in parsed:
|
| 160 |
+
if "session" in str(parsed['error']).lower():
|
| 161 |
+
print(f"[SESSION] Twitter Profile: Session not found or expired")
|
| 162 |
+
print(f" Error: {parsed['error'][:100]}")
|
| 163 |
+
elif "timeout" in str(parsed['error']).lower():
|
| 164 |
+
print(f"[TIMEOUT] Twitter Profile: Navigation timed out (X blocks automation)")
|
| 165 |
+
print(f" Error: {parsed['error'][:100]}")
|
| 166 |
+
else:
|
| 167 |
+
print(f"[WARN] Twitter Profile error: {parsed['error'][:100]}")
|
| 168 |
+
elif isinstance(parsed, dict):
|
| 169 |
+
count = len(parsed.get('results', []))
|
| 170 |
+
print(f"[OK] Twitter Profile: {count} tweets fetched from @SLTMobitel")
|
| 171 |
+
else:
|
| 172 |
+
print(f"[OK] Twitter Profile returned: {type(parsed)}")
|
| 173 |
+
except Exception as e:
|
| 174 |
+
print(f"[FAIL] Twitter Profile: {e}")
|
| 175 |
+
|
| 176 |
+
print("\n" + "="*60)
|
| 177 |
+
print("[SUMMARY]")
|
| 178 |
+
print("="*60)
|
| 179 |
+
print("If you see [SESSION] errors, please create new sessions using:")
|
| 180 |
+
print(" - Twitter: Run session manager with Twitter login")
|
| 181 |
+
print(" - Facebook: Run session manager with Facebook login")
|
| 182 |
+
print(" - LinkedIn: Run session manager with LinkedIn login")
|
| 183 |
+
print(" - Instagram: Run session manager with Instagram login")
|
| 184 |
+
print("\nSession manager: python src/utils/session_manager.py")
|
| 185 |
+
print("="*60)
|
test_weather_tools.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
test_weather_tools.py
|
| 3 |
+
Quick test for weather and flood tools only (no sessions required)
|
| 4 |
+
"""
|
| 5 |
+
import sys
|
| 6 |
+
import os
|
| 7 |
+
import io
|
| 8 |
+
|
| 9 |
+
# Force UTF-8 output
|
| 10 |
+
if sys.platform == 'win32':
|
| 11 |
+
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
|
| 12 |
+
|
| 13 |
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
| 14 |
+
|
| 15 |
+
print("="*60)
|
| 16 |
+
print("[TEST] WEATHER & FLOOD TOOLS")
|
| 17 |
+
print("="*60)
|
| 18 |
+
|
| 19 |
+
# Test 1: DMC Alerts
|
| 20 |
+
print("\n[1] Testing tool_dmc_alerts...")
|
| 21 |
+
try:
|
| 22 |
+
from src.utils.utils import tool_dmc_alerts
|
| 23 |
+
result = tool_dmc_alerts()
|
| 24 |
+
print(f"[OK] DMC Alerts: {str(result)[:300]}...")
|
| 25 |
+
except Exception as e:
|
| 26 |
+
print(f"[FAIL] DMC Alerts: {e}")
|
| 27 |
+
|
| 28 |
+
# Test 2: Weather Nowcast
|
| 29 |
+
print("\n[2] Testing tool_weather_nowcast...")
|
| 30 |
+
try:
|
| 31 |
+
from src.utils.utils import tool_weather_nowcast
|
| 32 |
+
result = tool_weather_nowcast()
|
| 33 |
+
print(f"[OK] Weather Nowcast: {str(result)[:300]}...")
|
| 34 |
+
except Exception as e:
|
| 35 |
+
print(f"[FAIL] Weather Nowcast: {e}")
|
| 36 |
+
|
| 37 |
+
# Test 3: District Weather
|
| 38 |
+
print("\n[3] Testing tool_district_weather...")
|
| 39 |
+
try:
|
| 40 |
+
from src.utils.utils import tool_district_weather
|
| 41 |
+
result = tool_district_weather("colombo")
|
| 42 |
+
print(f"[OK] District Weather: {str(result)[:300]}...")
|
| 43 |
+
except Exception as e:
|
| 44 |
+
print(f"[FAIL] District Weather: {e}")
|
| 45 |
+
|
| 46 |
+
# Test 4: RiverNet (may take longer - uses Playwright)
|
| 47 |
+
print("\n[4] Testing tool_rivernet_status (may take 30-60 seconds)...")
|
| 48 |
+
try:
|
| 49 |
+
from src.utils.utils import tool_rivernet_status
|
| 50 |
+
result = tool_rivernet_status()
|
| 51 |
+
print(f"[OK] RiverNet: {str(result)[:300]}...")
|
| 52 |
+
except Exception as e:
|
| 53 |
+
print(f"[FAIL] RiverNet: {e}")
|
| 54 |
+
|
| 55 |
+
print("\n" + "="*60)
|
| 56 |
+
print("[DONE] Weather tools test complete")
|
| 57 |
+
print("="*60)
|
tool_test_results.json
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"timestamp": "2025-12-08T14:41:26.602415",
|
| 3 |
+
"summary": {
|
| 4 |
+
"success": 14,
|
| 5 |
+
"errors": 1,
|
| 6 |
+
"session_issues": [],
|
| 7 |
+
"other_errors": [
|
| 8 |
+
"scrape_twitter_profile"
|
| 9 |
+
]
|
| 10 |
+
},
|
| 11 |
+
"details": {
|
| 12 |
+
"tool_dmc_alerts": {
|
| 13 |
+
"status": "success",
|
| 14 |
+
"error": null
|
| 15 |
+
},
|
| 16 |
+
"tool_weather_nowcast": {
|
| 17 |
+
"status": "success",
|
| 18 |
+
"error": null
|
| 19 |
+
},
|
| 20 |
+
"tool_rivernet_status": {
|
| 21 |
+
"status": "success",
|
| 22 |
+
"error": null
|
| 23 |
+
},
|
| 24 |
+
"tool_district_weather": {
|
| 25 |
+
"status": "success",
|
| 26 |
+
"error": null
|
| 27 |
+
},
|
| 28 |
+
"scrape_local_news": {
|
| 29 |
+
"status": "success",
|
| 30 |
+
"error": null
|
| 31 |
+
},
|
| 32 |
+
"scrape_cse_stock_data": {
|
| 33 |
+
"status": "success",
|
| 34 |
+
"error": null
|
| 35 |
+
},
|
| 36 |
+
"scrape_government_gazette": {
|
| 37 |
+
"status": "success",
|
| 38 |
+
"error": null
|
| 39 |
+
},
|
| 40 |
+
"scrape_parliament_minutes": {
|
| 41 |
+
"status": "success",
|
| 42 |
+
"error": null
|
| 43 |
+
},
|
| 44 |
+
"scrape_reddit": {
|
| 45 |
+
"status": "success",
|
| 46 |
+
"error": null
|
| 47 |
+
},
|
| 48 |
+
"scrape_twitter": {
|
| 49 |
+
"status": "success",
|
| 50 |
+
"error": null
|
| 51 |
+
},
|
| 52 |
+
"scrape_facebook": {
|
| 53 |
+
"status": "success",
|
| 54 |
+
"error": null
|
| 55 |
+
},
|
| 56 |
+
"scrape_linkedin": {
|
| 57 |
+
"status": "success",
|
| 58 |
+
"error": null
|
| 59 |
+
},
|
| 60 |
+
"scrape_instagram": {
|
| 61 |
+
"status": "success",
|
| 62 |
+
"error": null
|
| 63 |
+
},
|
| 64 |
+
"scrape_twitter_profile": {
|
| 65 |
+
"status": "error",
|
| 66 |
+
"error": "Profile not found or private: @SLTMobitel"
|
| 67 |
+
},
|
| 68 |
+
"scrape_facebook_profile": {
|
| 69 |
+
"status": "success",
|
| 70 |
+
"error": null
|
| 71 |
+
}
|
| 72 |
+
}
|
| 73 |
+
}
|