gauravlochab
fix: roi graph
f3b2793
import requests
import pandas as pd
import gradio as gr
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from datetime import datetime, timedelta
import json
# Commenting out blockchain-related imports that cause loading issues
# from web3 import Web3
import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import random
import logging
from typing import List, Dict, Any, Optional
# Comment out the import for now and replace with dummy functions
# from app_trans_new import create_transcation_visualizations,create_active_agents_visualizations
# APR visualization functions integrated directly
from fetch_and_preprocess_data import generate_continuous_random_data
from initial_value_fixer import fix_apr_and_roi
from load_from_csv import (
load_apr_data_from_csv,
load_roi_data_from_csv,
load_statistics_from_csv,
check_csv_data_availability,
get_data_freshness_info
)
# Set up logging with appropriate verbosity
logging.basicConfig(
level=logging.INFO, # Use INFO level instead of DEBUG to reduce verbosity
format="%(asctime)s - %(levelname)s - %(message)s",
handlers=[
logging.FileHandler("app_debug.log"), # Log to file for persistence
logging.StreamHandler() # Also log to console
]
)
logger = logging.getLogger(__name__)
# Reduce third-party library logging
logging.getLogger("urllib3").setLevel(logging.WARNING)
logging.getLogger("httpx").setLevel(logging.WARNING)
logging.getLogger("matplotlib").setLevel(logging.WARNING)
# Log the startup information
logger.info("============= APPLICATION STARTING =============")
logger.info(f"Running from directory: {os.getcwd()}")
# Global variables to store the data for reuse
global_df = None
global_roi_df = None
global_dummy_apr_df = None # Store dummy APR data separately
global_dummy_roi_df = None # Store dummy ROI data separately
# Configuration
API_BASE_URL = "https://afmdb.autonolas.tech"
logger.info(f"Using API endpoint: {API_BASE_URL}")
def get_agent_type_by_name(type_name: str) -> Dict[str, Any]:
"""Get agent type by name"""
url = f"{API_BASE_URL}/api/agent-types/name/{type_name}"
logger.debug(f"Calling API: {url}")
try:
response = requests.get(url)
logger.debug(f"Response status: {response.status_code}")
if response.status_code == 404:
logger.error(f"Agent type '{type_name}' not found")
return None
response.raise_for_status()
result = response.json()
logger.debug(f"Agent type response: {result}")
return result
except Exception as e:
logger.error(f"Error in get_agent_type_by_name: {e}")
return None
def get_attribute_definition_by_name(attr_name: str) -> Dict[str, Any]:
"""Get attribute definition by name"""
url = f"{API_BASE_URL}/api/attributes/name/{attr_name}"
logger.debug(f"Calling API: {url}")
try:
response = requests.get(url)
logger.debug(f"Response status: {response.status_code}")
if response.status_code == 404:
logger.error(f"Attribute definition '{attr_name}' not found")
return None
response.raise_for_status()
result = response.json()
logger.debug(f"Attribute definition response: {result}")
return result
except Exception as e:
logger.error(f"Error in get_attribute_definition_by_name: {e}")
return None
def get_agents_by_type(type_id: int) -> List[Dict[str, Any]]:
"""Get all agents of a specific type"""
url = f"{API_BASE_URL}/api/agent-types/{type_id}/agents/"
logger.debug(f"Calling API: {url}")
try:
response = requests.get(url)
logger.debug(f"Response status: {response.status_code}")
if response.status_code == 404:
logger.error(f"No agents found for type ID {type_id}")
return []
response.raise_for_status()
result = response.json()
logger.debug(f"Agents count: {len(result)}")
logger.debug(f"First few agents: {result[:2] if result else []}")
return result
except Exception as e:
logger.error(f"Error in get_agents_by_type: {e}")
return []
def get_attribute_values_by_type_and_attr(agents: List[Dict[str, Any]], attr_def_id: int) -> List[Dict[str, Any]]:
"""Get all attribute values for a specific attribute definition across all agents of a given list"""
all_attributes = []
logger.debug(f"Getting attributes for {len(agents)} agents with attr_def_id: {attr_def_id}")
# For each agent, get their attributes and filter for the one we want
for agent in agents:
agent_id = agent["agent_id"]
# Call the /api/agents/{agent_id}/attributes/ endpoint
url = f"{API_BASE_URL}/api/agents/{agent_id}/attributes/"
logger.debug(f"Calling API for agent {agent_id}: {url}")
try:
response = requests.get(url, params={"limit": 1000})
if response.status_code == 404:
logger.error(f"No attributes found for agent ID {agent_id}")
continue
response.raise_for_status()
agent_attrs = response.json()
logger.debug(f"Agent {agent_id} has {len(agent_attrs)} attributes")
# Filter for the specific attribute definition ID
filtered_attrs = [attr for attr in agent_attrs if attr.get("attr_def_id") == attr_def_id]
logger.debug(f"Agent {agent_id} has {len(filtered_attrs)} APR attributes")
if filtered_attrs:
logger.debug(f"Sample attribute for agent {agent_id}: {filtered_attrs[0]}")
all_attributes.extend(filtered_attrs)
except requests.exceptions.RequestException as e:
logger.error(f"Error fetching attributes for agent ID {agent_id}: {e}")
logger.info(f"Total APR attributes found across all agents: {len(all_attributes)}")
return all_attributes
def get_agent_name(agent_id: int, agents: List[Dict[str, Any]]) -> str:
"""Get agent name from agent ID"""
for agent in agents:
if agent["agent_id"] == agent_id:
return agent["agent_name"]
return "Unknown"
def extract_apr_value(attr: Dict[str, Any]) -> Dict[str, Any]:
"""Extract APR value, adjusted APR value, ROI value, and timestamp from JSON value"""
try:
agent_id = attr.get("agent_id", "unknown")
logger.debug(f"Extracting APR value for agent {agent_id}")
# The APR value is stored in the json_value field
if attr["json_value"] is None:
logger.debug(f"Agent {agent_id}: json_value is None")
return {"apr": None, "adjusted_apr": None, "roi": None, "timestamp": None, "agent_id": agent_id, "is_dummy": False}
# If json_value is a string, parse it
if isinstance(attr["json_value"], str):
logger.debug(f"Agent {agent_id}: json_value is string, parsing")
json_data = json.loads(attr["json_value"])
else:
json_data = attr["json_value"]
apr = json_data.get("apr")
adjusted_apr = json_data.get("adjusted_apr") # Extract adjusted_apr if present
timestamp = json_data.get("timestamp")
address = json_data.get("portfolio_snapshot", {}).get("portfolio", {}).get("address")
# Extract ROI (f_i_ratio) from calculation_metrics if it exists
roi = None
if "calculation_metrics" in json_data and json_data["calculation_metrics"] is not None:
roi = json_data["calculation_metrics"].get("f_i_ratio")
# Filter ROI values to -10 to 10 range
if roi is not None and (roi < -10 or roi > 10):
roi = None # Exclude ROI values outside the range
logger.debug(f"Agent {agent_id}: Raw APR value: {apr}, adjusted APR value: {adjusted_apr}, ROI value: {roi}, timestamp: {timestamp}")
# Convert timestamp to datetime if it exists
timestamp_dt = None
if timestamp:
timestamp_dt = datetime.fromtimestamp(timestamp)
result = json_data.copy() # Copy the original JSON data for logging
result.update({
"apr": apr,
"adjusted_apr": adjusted_apr,
"roi": roi,
"timestamp": timestamp_dt,
"agent_id": agent_id,
"is_dummy": False,
"address": address
})
logger.debug(f"Agent {agent_id}: Extracted result: {result}")
return result
except (json.JSONDecodeError, KeyError, TypeError) as e:
logger.error(f"Error parsing JSON value: {e} for agent_id: {attr.get('agent_id')}")
logger.error(f"Problematic json_value: {attr.get('json_value')}")
return {"apr": None, "adjusted_apr": None, "roi": None, "timestamp": None, "agent_id": attr.get('agent_id'), "is_dummy": False, "address": None}
def fetch_apr_data_from_db():
"""
Fetch APR data from database using the API.
"""
global global_df
global global_roi_df
logger.info("==== Starting APR data fetch ====")
try:
# Step 1: Find the Optimus agent type
logger.info("Finding Optimus agent type")
optimus_type = get_agent_type_by_name("Optimus")
if not optimus_type:
logger.error("Optimus agent type not found, using placeholder data")
global_df = pd.DataFrame([])
return global_df
type_id = optimus_type["type_id"]
logger.info(f"Found Optimus agent type with ID: {type_id}")
# Step 2: Find the APR attribute definition
logger.info("Finding APR attribute definition")
apr_attr_def = get_attribute_definition_by_name("APR")
if not apr_attr_def:
logger.error("APR attribute definition not found, using placeholder data")
global_df = pd.DataFrame([])
return global_df
attr_def_id = apr_attr_def["attr_def_id"]
logger.info(f"Found APR attribute definition with ID: {attr_def_id}")
# Step 3: Get all agents of type Optimus
logger.info(f"Getting all agents of type Optimus (type_id: {type_id})")
optimus_agents = get_agents_by_type(type_id)
if not optimus_agents:
logger.error("No agents of type 'Optimus' found")
global_df = pd.DataFrame([])
return global_df
logger.info(f"Found {len(optimus_agents)} Optimus agents")
logger.debug(f"Optimus agents: {[{'agent_id': a['agent_id'], 'agent_name': a['agent_name']} for a in optimus_agents]}")
# Step 4: Fetch all APR values for Optimus agents
logger.info(f"Fetching APR values for all Optimus agents (attr_def_id: {attr_def_id})")
apr_attributes = get_attribute_values_by_type_and_attr(optimus_agents, attr_def_id)
if not apr_attributes:
logger.error("No APR values found for 'Optimus' agents")
global_df = pd.DataFrame([])
return global_df
logger.info(f"Found {len(apr_attributes)} APR attributes total")
# Step 5: Extract APR and ROI data
logger.info("Extracting APR and ROI data from attributes")
apr_data_list = []
roi_data_list = []
for attr in apr_attributes:
data = extract_apr_value(attr)
if data["timestamp"] is not None:
# Get agent name
agent_name = get_agent_name(attr["agent_id"], optimus_agents)
# Add agent name to the data
data["agent_name"] = agent_name
# Add is_dummy flag (all real data)
data["is_dummy"] = False
# Process APR data
if data["apr"] is not None:
# Include all APR values (including negative ones) EXCEPT zero and -100
if data["apr"] != 0 and data["apr"] != -100:
apr_entry = data.copy()
apr_entry["metric_type"] = "APR"
logger.debug(f"Agent {agent_name} ({attr['agent_id']}): APR value: {data['apr']}")
# Add to the APR data list
apr_data_list.append(apr_entry)
else:
# Log that we're skipping zero or -100 values
logger.debug(f"Skipping APR value for agent {agent_name} ({attr['agent_id']}): {data['apr']} (zero or -100)")
# Process ROI data
if data["roi"] is not None:
# Include all ROI values
roi_entry = {
"roi": data["roi"],
"timestamp": data["timestamp"],
"agent_id": data["agent_id"],
"agent_name": agent_name,
"is_dummy": False,
"metric_type": "ROI"
}
logger.debug(f"Agent {agent_name} ({attr['agent_id']}): ROI value: {data['roi']}")
# Add to the ROI data list
roi_data_list.append(roi_entry)
logger.info(f"Extracted {len(apr_data_list)} valid APR data points and {len(roi_data_list)} valid ROI data points")
# Added debug for adjusted APR data after May 10th
may_10_2025 = datetime(2025, 5, 10)
after_may_10 = [d for d in apr_data_list if d['timestamp'] >= may_10_2025]
with_adjusted_after_may_10 = [d for d in after_may_10 if d['adjusted_apr'] is not None]
logger.info(f"Data points after May 10th, 2025: {len(after_may_10)}")
logger.info(f"Data points with adjusted_apr after May 10th, 2025: {len(with_adjusted_after_may_10)}")
# Log detailed information about when data began
first_adjusted = None
if with_adjusted_after_may_10:
first_adjusted_after = min(with_adjusted_after_may_10, key=lambda x: x['timestamp'])
logger.info(f"First adjusted_apr after May 10th: {first_adjusted_after['timestamp']} (Agent: {first_adjusted_after['agent_id']})")
# Check all data for first adjusted_apr
all_with_adjusted = [d for d in apr_data_list if d['adjusted_apr'] is not None]
if all_with_adjusted:
first_adjusted = min(all_with_adjusted, key=lambda x: x['timestamp'])
logger.info(f"First adjusted_apr ever: {first_adjusted['timestamp']} (Agent: {first_adjusted['agent_id']})")
last_adjusted = max(all_with_adjusted, key=lambda x: x['timestamp'])
logger.info(f"Last adjusted_apr ever: {last_adjusted['timestamp']} (Agent: {last_adjusted['agent_id']})")
# Calculate overall coverage
adjusted_ratio = len(all_with_adjusted) / len(apr_data_list) * 100
logger.info(f"Overall adjusted_apr coverage: {adjusted_ratio:.2f}% ({len(all_with_adjusted)}/{len(apr_data_list)} records)")
# Log per-agent adjusted APR statistics
agent_stats = {}
for record in apr_data_list:
agent_id = record['agent_id']
has_adjusted = record['adjusted_apr'] is not None
if agent_id not in agent_stats:
agent_stats[agent_id] = {'total': 0, 'adjusted': 0}
agent_stats[agent_id]['total'] += 1
if has_adjusted:
agent_stats[agent_id]['adjusted'] += 1
# Log stats for agents with meaningful data
for agent_id, stats in agent_stats.items():
if stats['total'] > 0:
coverage = (stats['adjusted'] / stats['total']) * 100
if coverage > 0: # Only log agents that have at least some adjusted data
logger.info(f"Agent {agent_id}: {coverage:.2f}% adjusted coverage ({stats['adjusted']}/{stats['total']} records)")
# Check for gaps in adjusted APR data
for agent_id in agent_stats:
# Get all records for this agent
agent_records = [r for r in apr_data_list if r['agent_id'] == agent_id]
# Sort by timestamp
agent_records.sort(key=lambda x: x['timestamp'])
# Find where adjusted APR starts and if there are gaps
has_adjusted = False
gap_count = 0
streak_length = 0
for record in agent_records:
if record['adjusted_apr'] is not None:
if not has_adjusted:
has_adjusted = True
logger.info(f"Agent {agent_id}: First adjusted APR at {record['timestamp']}")
streak_length += 1
elif has_adjusted:
# We had adjusted data but now it's missing
gap_count += 1
if streak_length > 0:
logger.warning(f"Agent {agent_id}: Gap in adjusted APR data after {streak_length} consecutive records")
streak_length = 0
if gap_count > 0:
logger.warning(f"Agent {agent_id}: Found {gap_count} gaps in adjusted APR data")
elif has_adjusted:
logger.info(f"Agent {agent_id}: Continuous adjusted APR data with no gaps")
# Provide summary statistics
agents_with_data = sum(1 for stats in agent_stats.values() if stats['adjusted'] > 0)
agents_with_gaps = sum(1 for agent_id in agent_stats if
any(apr_data_list[i]['agent_id'] == agent_id and apr_data_list[i]['adjusted_apr'] is not None and
i+1 < len(apr_data_list) and apr_data_list[i+1]['agent_id'] == agent_id and
apr_data_list[i+1]['adjusted_apr'] is None
for i in range(len(apr_data_list)-1)))
logger.info(f"ADJUSTED APR SUMMARY: {agents_with_data}/{len(agent_stats)} agents have adjusted APR data")
if agents_with_gaps > 0:
logger.warning(f"ATTENTION: {agents_with_gaps} agents have gaps in their adjusted APR data")
logger.warning("These gaps may cause discontinuities in the adjusted APR graph")
else:
logger.info("No gaps detected in adjusted APR data - graph should be continuous")
if len(with_adjusted_after_may_10) == 0 and len(after_may_10) > 0:
logger.warning("No adjusted_apr values found after May 10th, 2025 despite having APR data")
# Log agent IDs with missing adjusted_apr after May 10th
agents_after_may_10 = set(d['agent_id'] for d in after_may_10)
logger.info(f"Agents with data after May 10th: {agents_after_may_10}")
# Check these same agents before May 10th
before_may_10 = [d for d in apr_data_list if d['timestamp'] < may_10_2025]
agents_with_adjusted_before = {d['agent_id'] for d in before_may_10 if d['adjusted_apr'] is not None}
# Agents that had adjusted_apr before but not after
missing_adjusted = agents_with_adjusted_before.intersection(agents_after_may_10)
if missing_adjusted:
logger.warning(f"Agents that had adjusted_apr before May 10th but not after: {missing_adjusted}")
# Find the last valid adjusted_apr date for these agents
for agent_id in missing_adjusted:
agent_data = [d for d in before_may_10 if d['agent_id'] == agent_id and d['adjusted_apr'] is not None]
if agent_data:
last_entry = max(agent_data, key=lambda d: d['timestamp'])
logger.info(f"Agent {agent_id}: Last adjusted_apr on {last_entry['timestamp']} with value {last_entry['adjusted_apr']}")
# Look at the first entry after the cutoff without adjusted_apr
agent_after = [d for d in after_may_10 if d['agent_id'] == agent_id]
if agent_after:
first_after = min(agent_after, key=lambda d: d['timestamp'])
logger.info(f"Agent {agent_id}: First entry after cutoff on {first_after['timestamp']} missing adjusted_apr")
# If the agent data has the 'adjusted_apr_key' field, log that info
if 'adjusted_apr_key' in first_after:
logger.info(f"Agent {agent_id}: Key used for adjusted_apr: {first_after['adjusted_apr_key']}")
# Add debug logic to check for any adjusted_apr after May 10th and which agents have it
elif len(with_adjusted_after_may_10) > 0:
logger.info("Found adjusted_apr values after May 10th, 2025")
# Group by agent and log
agent_counts = {}
for item in with_adjusted_after_may_10:
agent_id = item['agent_id']
if agent_id in agent_counts:
agent_counts[agent_id] += 1
else:
agent_counts[agent_id] = 1
logger.info(f"Agents with adjusted_apr after May 10th: {agent_counts}")
# Log adjusted_apr keys used
keys_used = {item.get('adjusted_apr_key') for item in with_adjusted_after_may_10 if 'adjusted_apr_key' in item}
if keys_used:
logger.info(f"Keys used for adjusted_apr after May 10th: {keys_used}")
# Convert to DataFrames
if not apr_data_list:
logger.error("No valid APR data extracted")
global_df = pd.DataFrame([])
else:
# Convert list of dictionaries to DataFrame for APR
global_df = pd.DataFrame(apr_data_list)
if not roi_data_list:
logger.error("No valid ROI data extracted")
global_roi_df = pd.DataFrame([])
else:
# Convert list of dictionaries to DataFrame for ROI
global_roi_df = pd.DataFrame(roi_data_list)
# Handle dummy data generation
global global_dummy_apr_df
global global_dummy_roi_df
logger.info("Handling dummy data...")
# Generate dummy APR data only if needed
if not global_df.empty:
# Check if we already have dummy data
if global_dummy_apr_df is None:
# First time - generate all dummy data
logger.info("Generating initial dummy APR data...")
global_dummy_apr_df = generate_continuous_random_data(global_df)
# Only keep APR data
if not global_dummy_apr_df.empty:
global_dummy_apr_df = global_dummy_apr_df[global_dummy_apr_df['metric_type'] == 'APR']
logger.info(f"Generated {len(global_dummy_apr_df)} initial dummy APR data points")
else:
# We already have dummy data - check if we need to generate more
# Find the latest timestamp in the real data
latest_real_timestamp = global_df['timestamp'].max()
# Find the latest timestamp in the dummy data
latest_dummy_timestamp = global_dummy_apr_df['timestamp'].max() if not global_dummy_apr_df.empty else None
# If the real data has newer timestamps, generate more dummy data
if latest_dummy_timestamp is None or latest_real_timestamp > latest_dummy_timestamp:
logger.info("Generating additional dummy APR data for new timestamps...")
# Create a temporary dataframe with only the latest real data
temp_df = global_df[global_df['timestamp'] > latest_dummy_timestamp] if latest_dummy_timestamp else global_df
# Generate dummy data for the new timestamps
new_dummy_data = generate_continuous_random_data(temp_df)
# Only keep APR data
if not new_dummy_data.empty:
new_dummy_data = new_dummy_data[new_dummy_data['metric_type'] == 'APR']
logger.info(f"Generated {len(new_dummy_data)} additional dummy APR data points")
# Append the new dummy data to the existing dummy data
global_dummy_apr_df = pd.concat([global_dummy_apr_df, new_dummy_data], ignore_index=True)
else:
logger.info("No new timestamps in real data, using existing dummy APR data")
# Combine real and dummy APR data
if not global_dummy_apr_df.empty:
apr_dummy_count = len(global_dummy_apr_df)
global_df = pd.concat([global_df, global_dummy_apr_df], ignore_index=True)
logger.info(f"Added {apr_dummy_count} dummy APR data points to the dataset")
# Generate dummy ROI data only if needed
if not global_roi_df.empty:
# Check if we already have dummy data
if global_dummy_roi_df is None:
# First time - generate all dummy data
logger.info("Generating initial dummy ROI data...")
global_dummy_roi_df = generate_continuous_random_data(global_roi_df)
# Only keep ROI data
if not global_dummy_roi_df.empty:
global_dummy_roi_df = global_dummy_roi_df[global_dummy_roi_df['metric_type'] == 'ROI']
logger.info(f"Generated {len(global_dummy_roi_df)} initial dummy ROI data points")
else:
# We already have dummy data - check if we need to generate more
# Find the latest timestamp in the real data
latest_real_timestamp = global_roi_df['timestamp'].max()
# Find the latest timestamp in the dummy data
latest_dummy_timestamp = global_dummy_roi_df['timestamp'].max() if not global_dummy_roi_df.empty else None
# If the real data has newer timestamps, generate more dummy data
if latest_dummy_timestamp is None or latest_real_timestamp > latest_dummy_timestamp:
logger.info("Generating additional dummy ROI data for new timestamps...")
# Create a temporary dataframe with only the latest real data
temp_df = global_roi_df[global_roi_df['timestamp'] > latest_dummy_timestamp] if latest_dummy_timestamp else global_roi_df
# Generate dummy data for the new timestamps
new_dummy_data = generate_continuous_random_data(temp_df)
# Only keep ROI data
if not new_dummy_data.empty:
new_dummy_data = new_dummy_data[new_dummy_data['metric_type'] == 'ROI']
logger.info(f"Generated {len(new_dummy_data)} additional dummy ROI data points")
# Append the new dummy data to the existing dummy data
global_dummy_roi_df = pd.concat([global_dummy_roi_df, new_dummy_data], ignore_index=True)
else:
logger.info("No new timestamps in real data, using existing dummy ROI data")
# Combine real and dummy ROI data
if not global_dummy_roi_df.empty:
roi_dummy_count = len(global_dummy_roi_df)
global_roi_df = pd.concat([global_roi_df, global_dummy_roi_df], ignore_index=True)
logger.info(f"Added {roi_dummy_count} dummy ROI data points to the dataset")
# Log the resulting dataframe
logger.info(f"Created DataFrame with {len(global_df)} rows (including dummy data)")
logger.info(f"DataFrame columns: {global_df.columns.tolist()}")
logger.info(f"APR statistics: min={global_df['apr'].min()}, max={global_df['apr'].max()}, mean={global_df['apr'].mean()}")
# Log adjusted APR statistics if available
if 'adjusted_apr' in global_df.columns and global_df['adjusted_apr'].notna().any():
logger.info(f"Adjusted APR statistics: min={global_df['adjusted_apr'].min()}, max={global_df['adjusted_apr'].max()}, mean={global_df['adjusted_apr'].mean()}")
logger.info(f"Number of records with adjusted_apr: {global_df['adjusted_apr'].notna().sum()} out of {len(global_df)}")
# Log the difference between APR and adjusted APR
valid_rows = global_df[global_df['adjusted_apr'].notna()]
if not valid_rows.empty:
avg_diff = (valid_rows['apr'] - valid_rows['adjusted_apr']).mean()
max_diff = (valid_rows['apr'] - valid_rows['adjusted_apr']).max()
min_diff = (valid_rows['apr'] - valid_rows['adjusted_apr']).min()
logger.info(f"APR vs. adjusted APR difference: avg={avg_diff:.2f}, min={min_diff:.2f}, max={max_diff:.2f}")
# All values are APR type (excluding zero and -100 values)
logger.info("All values are APR type (excluding zero and -100 values)")
logger.info(f"Agents count: {global_df['agent_name'].value_counts().to_dict()}")
# Log the entire dataframe for debugging
logger.debug("Final DataFrame contents:")
for idx, row in global_df.iterrows():
logger.debug(f"Row {idx}: {row.to_dict()}")
# Add this at the end, right before returning
logger.info("Analyzing adjusted_apr data availability...")
log_adjusted_apr_availability(global_df)
return global_df, global_roi_df
except requests.exceptions.RequestException as e:
logger.error(f"API request error: {e}")
global_df = pd.DataFrame([])
global_roi_df = pd.DataFrame([])
return global_df, global_roi_df
except Exception as e:
logger.error(f"Error fetching APR data: {e}")
logger.exception("Exception traceback:")
global_df = pd.DataFrame([])
global_roi_df = pd.DataFrame([])
return global_df, global_roi_df
def log_adjusted_apr_availability(df):
"""
Analyzes and logs detailed information about adjusted_apr data availability.
Args:
df: DataFrame containing the APR data with adjusted_apr column
"""
if df.empty or 'adjusted_apr' not in df.columns:
logger.warning("No adjusted_apr data available for analysis")
return
# Get only rows with valid adjusted_apr values
has_adjusted = df[df['adjusted_apr'].notna()]
if has_adjusted.empty:
logger.warning("No valid adjusted_apr values found in the dataset")
return
# 1. When did adjusted_apr data start?
first_adjusted = has_adjusted['timestamp'].min()
last_adjusted = has_adjusted['timestamp'].max()
logger.info(f"ADJUSTED APR SUMMARY: First data point: {first_adjusted}")
logger.info(f"ADJUSTED APR SUMMARY: Last data point: {last_adjusted}")
logger.info(f"ADJUSTED APR SUMMARY: Data spans {(last_adjusted - first_adjusted).days} days")
# Calculate coverage percentage
total_records = len(df)
records_with_adjusted = len(has_adjusted)
coverage_pct = (records_with_adjusted / total_records) * 100 if total_records > 0 else 0
logger.info(f"ADJUSTED APR SUMMARY: {records_with_adjusted} out of {total_records} records have adjusted_apr ({coverage_pct:.2f}%)")
# 2. How many agents are providing adjusted_apr?
agents_with_adjusted = has_adjusted['agent_id'].unique()
logger.info(f"ADJUSTED APR SUMMARY: {len(agents_with_adjusted)} agents providing adjusted_apr")
logger.info(f"ADJUSTED APR SUMMARY: Agents providing adjusted_apr: {list(agents_with_adjusted)}")
# 3. May 10th cutoff analysis
may_10_2025 = datetime(2025, 5, 10)
before_cutoff = df[df['timestamp'] < may_10_2025]
after_cutoff = df[df['timestamp'] >= may_10_2025]
if not before_cutoff.empty and not after_cutoff.empty:
before_with_adjusted = before_cutoff['adjusted_apr'].notna().sum()
before_pct = (before_with_adjusted / len(before_cutoff)) * 100
after_with_adjusted = after_cutoff['adjusted_apr'].notna().sum()
after_pct = (after_with_adjusted / len(after_cutoff)) * 100
logger.info(f"ADJUSTED APR SUMMARY: Before May 10th: {before_with_adjusted}/{len(before_cutoff)} records with adjusted_apr ({before_pct:.2f}%)")
logger.info(f"ADJUSTED APR SUMMARY: After May 10th: {after_with_adjusted}/{len(after_cutoff)} records with adjusted_apr ({after_pct:.2f}%)")
# Check which agents had data before and after
agents_before = set(before_cutoff[before_cutoff['adjusted_apr'].notna()]['agent_id'].unique())
agents_after = set(after_cutoff[after_cutoff['adjusted_apr'].notna()]['agent_id'].unique())
missing_after = agents_before - agents_after
if missing_after:
logger.warning(f"ADJUSTED APR SUMMARY: {len(missing_after)} agents stopped providing adjusted_apr after May 10th: {list(missing_after)}")
new_after = agents_after - agents_before
if new_after:
logger.info(f"ADJUSTED APR SUMMARY: {len(new_after)} agents started providing adjusted_apr after May 10th: {list(new_after)}")
# 4. Find date ranges for missing adjusted_apr
# Group by agent to analyze per-agent data availability
logger.info("=== DETAILED AGENT ANALYSIS ===")
for agent_id in df['agent_id'].unique():
agent_data = df[df['agent_id'] == agent_id]
agent_name = agent_data['agent_name'].iloc[0] if not agent_data.empty else f"Agent {agent_id}"
# Get the valid adjusted_apr values for this agent
agent_adjusted = agent_data[agent_data['adjusted_apr'].notna()]
if agent_adjusted.empty:
logger.info(f"Agent {agent_name} (ID: {agent_id}): No adjusted_apr data available")
continue
# Get the date range for this agent's data
agent_start = agent_data['timestamp'].min()
agent_end = agent_data['timestamp'].max()
# Get the date range for adjusted_apr data
adjusted_start = agent_adjusted['timestamp'].min()
adjusted_end = agent_adjusted['timestamp'].max()
total_agent_records = len(agent_data)
agent_with_adjusted = len(agent_adjusted)
coverage_pct = (agent_with_adjusted / total_agent_records) * 100 if total_agent_records > 0 else 0
logger.info(f"Agent {agent_name} (ID: {agent_id}): {agent_with_adjusted}/{total_agent_records} records with adjusted_apr ({coverage_pct:.2f}%)")
logger.info(f"Agent {agent_name} (ID: {agent_id}): APR data from {agent_start} to {agent_end}")
logger.info(f"Agent {agent_name} (ID: {agent_id}): Adjusted APR data from {adjusted_start} to {adjusted_end}")
# Calculate if this agent had data before/after May 10th
if not before_cutoff.empty and not after_cutoff.empty:
agent_before = before_cutoff[before_cutoff['agent_id'] == agent_id]
agent_after = after_cutoff[after_cutoff['agent_id'] == agent_id]
has_before = not agent_before.empty and agent_before['adjusted_apr'].notna().any()
has_after = not agent_after.empty and agent_after['adjusted_apr'].notna().any()
if has_before and not has_after:
last_date = agent_before[agent_before['adjusted_apr'].notna()]['timestamp'].max()
logger.warning(f"Agent {agent_name} (ID: {agent_id}): Stopped providing adjusted_apr after May 10th. Last data point: {last_date}")
elif not has_before and has_after:
first_date = agent_after[agent_after['adjusted_apr'].notna()]['timestamp'].min()
logger.info(f"Agent {agent_name} (ID: {agent_id}): Started providing adjusted_apr after May 10th. First data point: {first_date}")
# Check for gaps in adjusted_apr (periods of 24+ hours without data)
if len(agent_adjusted) < 2:
continue
# Sort by timestamp
sorted_data = agent_adjusted.sort_values('timestamp')
# Calculate time differences between consecutive data points
time_diffs = sorted_data['timestamp'].diff()
# Find gaps larger than 24 hours
gaps = sorted_data[time_diffs > pd.Timedelta(hours=24)]
if not gaps.empty:
logger.info(f"Agent {agent_name} (ID: {agent_id}): Found {len(gaps)} gaps in adjusted_apr data")
# Log the gaps
for i, row in gaps.iterrows():
# Find the previous timestamp before the gap
prev_idx = sorted_data.index.get_loc(i) - 1
prev_time = sorted_data.iloc[prev_idx]['timestamp'] if prev_idx >= 0 else None
if prev_time:
gap_start = prev_time
gap_end = row['timestamp']
gap_duration = gap_end - gap_start
logger.info(f"Agent {agent_name} (ID: {agent_id}): Missing adjusted_apr from {gap_start} to {gap_end} ({gap_duration.days} days, {gap_duration.seconds//3600} hours)")
def generate_apr_visualizations():
"""Generate APR visualizations with CSV-first approach for Hugging Face Space deployment"""
global global_df
# CSV-FIRST APPROACH: Try to load from CSV first
logger.info("Attempting to load APR data from CSV files...")
df, csv_file = load_apr_data_from_csv()
if not df.empty:
logger.info(f"Successfully loaded APR data from CSV: {len(df)} records")
global_df = df
# Create visualizations using CSV data
logger.info("Creating APR visualizations from CSV data...")
combined_fig = create_combined_time_series_graph(df)
return combined_fig, csv_file
# FALLBACK: If CSV not available, try API
logger.info("CSV data not available, falling back to API...")
try:
df, _ = fetch_apr_data_from_db()
# If we got no data at all, return placeholder figures
if df.empty:
logger.info("No APR data available from API either. Using fallback visualization.")
# Create empty visualizations with a message using Plotly
fig = go.Figure()
fig.add_annotation(
x=0.5, y=0.5,
text="No APR data available",
font=dict(size=20),
showarrow=False
)
fig.update_layout(
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
)
# Save as static file for reference
fig.write_html("optimus_apr_combined_graph.html")
fig.write_image("optimus_apr_combined_graph.png")
csv_file = None
return fig, csv_file
# Apply preprocessing to fix APR and ROI values
logger.info("Applying preprocessing to fix APR and ROI values...")
df = fix_apr_and_roi(df) # Apply preprocessing
global_df = df
# IMPORTANT: Also fix the ROI DataFrame with corrected values
logger.info("Extracting corrected ROI values from fixed APR data...")
if not df.empty and 'roi' in df.columns:
# Create corrected ROI DataFrame from the fixed APR data
corrected_roi_data = []
for idx, row in df.iterrows():
if not row['is_dummy'] and pd.notna(row['roi']):
roi_entry = {
"roi": row["roi"], # This is now the corrected ROI value
"timestamp": row["timestamp"],
"agent_id": row["agent_id"],
"agent_name": row["agent_name"],
"is_dummy": False,
"metric_type": "ROI"
}
corrected_roi_data.append(roi_entry)
# Replace the original ROI DataFrame with corrected values
if corrected_roi_data:
corrected_roi_df = pd.DataFrame(corrected_roi_data)
# Combine with dummy ROI data if it exists
if global_roi_df is not None and not global_roi_df.empty:
dummy_roi_data = global_roi_df[global_roi_df['is_dummy'] == True]
if not dummy_roi_data.empty:
global_roi_df = pd.concat([corrected_roi_df, dummy_roi_data], ignore_index=True)
else:
global_roi_df = corrected_roi_df
else:
global_roi_df = corrected_roi_df
logger.info(f"Updated ROI DataFrame with {len(corrected_roi_data)} corrected ROI values")
else:
logger.warning("No corrected ROI values found to update ROI DataFrame")
# Save preprocessed data to CSV before creating visualizations
logger.info("Saving preprocessed APR data to CSV...")
csv_file = save_to_csv(df)
# Create visualizations using the saved CSV data
logger.info("Creating APR visualizations from preprocessed data...")
combined_fig = create_combined_time_series_graph(df)
return combined_fig, csv_file
except Exception as e:
logger.error(f"Error fetching APR data from API: {e}")
# Return error visualization
fig = go.Figure()
fig.add_annotation(
x=0.5, y=0.5,
text=f"Error loading data: {str(e)}",
font=dict(size=16, color="red"),
showarrow=False
)
fig.update_layout(
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
)
return fig, None
def generate_roi_visualizations():
"""Generate ROI visualizations with CSV-first approach for Hugging Face Space deployment"""
global global_roi_df
# FIXED APPROACH: Use corrected ROI data from APR CSV instead of problematic ROI CSV
logger.info("Loading corrected ROI data from APR CSV files...")
df_apr, csv_file = load_apr_data_from_csv()
if not df_apr.empty and 'roi' in df_apr.columns:
# Extract ROI data from the APR CSV (which contains corrected values)
roi_data = []
for idx, row in df_apr.iterrows():
if pd.notna(row['roi']):
roi_entry = {
"roi": row["roi"], # Use corrected ROI from APR data
"timestamp": row["timestamp"],
"agent_id": row["agent_id"],
"agent_name": row["agent_name"],
"is_dummy": row["is_dummy"],
"metric_type": "ROI",
"apr": row.get("apr"),
"adjusted_apr": row.get("adjusted_apr")
}
roi_data.append(roi_entry)
if roi_data:
df_roi = pd.DataFrame(roi_data)
logger.info(f"Successfully extracted {len(df_roi)} corrected ROI records from APR CSV")
global_roi_df = df_roi
# Create visualizations using corrected ROI data
logger.info("Creating ROI visualizations from corrected APR CSV data...")
combined_fig = create_combined_roi_time_series_graph(df_roi)
return combined_fig, "optimus_apr_values.csv" # Source is APR CSV
else:
logger.warning("No ROI data found in APR CSV")
else:
logger.warning("APR CSV not available or missing ROI column")
# FALLBACK: If CSV not available, try API
logger.info("CSV data not available, falling back to API...")
try:
# Fetch data from database if not already fetched
if global_roi_df is None or global_roi_df.empty:
_, df_roi = fetch_apr_data_from_db()
else:
df_roi = global_roi_df
# If we got no data at all, return placeholder figures
if df_roi.empty:
logger.info("No ROI data available from API either. Using fallback visualization.")
# Create empty visualizations with a message using Plotly
fig = go.Figure()
fig.add_annotation(
x=0.5, y=0.5,
text="No ROI data available",
font=dict(size=20),
showarrow=False
)
fig.update_layout(
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
)
# Save as static file for reference
fig.write_html("optimus_roi_graph.html")
fig.write_image("optimus_roi_graph.png")
csv_file = None
return fig, csv_file
# Set global_roi_df for access by other functions
global_roi_df = df_roi
# IMPORTANT: Apply ROI corrections if we have fresh API data
logger.info("Checking if ROI data needs correction...")
if not df_roi.empty:
# Check if this ROI data contains uncorrected values (from API)
uncorrected_roi = df_roi[df_roi['is_dummy'] == False]
if not uncorrected_roi.empty:
logger.info("ROI data contains uncorrected values, applying corrections...")
# We need to get the corrected APR data to extract corrected ROI values
if global_df is not None and not global_df.empty:
# Extract corrected ROI values from the fixed APR data
corrected_roi_data = []
for idx, row in global_df.iterrows():
if not row['is_dummy'] and pd.notna(row['roi']):
roi_entry = {
"roi": row["roi"], # This is the corrected ROI value
"timestamp": row["timestamp"],
"agent_id": row["agent_id"],
"agent_name": row["agent_name"],
"is_dummy": False,
"metric_type": "ROI"
}
corrected_roi_data.append(roi_entry)
if corrected_roi_data:
corrected_roi_df = pd.DataFrame(corrected_roi_data)
# Combine with dummy ROI data if it exists
dummy_roi_data = df_roi[df_roi['is_dummy'] == True]
if not dummy_roi_data.empty:
df_roi = pd.concat([corrected_roi_df, dummy_roi_data], ignore_index=True)
else:
df_roi = corrected_roi_df
global_roi_df = df_roi
logger.info(f"Updated ROI DataFrame with {len(corrected_roi_data)} corrected ROI values")
else:
logger.warning("No corrected ROI values found in APR data")
else:
logger.warning("No corrected APR data available to extract ROI values from")
else:
logger.info("ROI data contains only dummy values, no correction needed")
# Save preprocessed ROI data to CSV before creating visualizations
logger.info("Saving preprocessed ROI data to CSV...")
csv_file = save_roi_to_csv(df_roi)
# Create visualizations using the saved CSV data
logger.info("Creating ROI visualizations from preprocessed data...")
combined_fig = create_combined_roi_time_series_graph(df_roi)
return combined_fig, csv_file
except Exception as e:
logger.error(f"Error fetching ROI data from API: {e}")
# Return error visualization
fig = go.Figure()
fig.add_annotation(
x=0.5, y=0.5,
text=f"Error loading data: {str(e)}",
font=dict(size=16, color="red"),
showarrow=False
)
fig.update_layout(
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
)
return fig, None
def aggregate_daily_data(df, metric_column):
"""
Aggregate data by date and agent, taking the median of values within each day.
Args:
df: DataFrame with timestamp, agent_id, and metric data
metric_column: Name of the metric column ('apr' or 'roi')
Returns:
DataFrame with daily aggregated data per agent
"""
if df.empty:
return df
# Convert timestamp to date only (ignore time)
df = df.copy()
df['date'] = df['timestamp'].dt.date
# NEW: Add detailed logging to verify median calculation
logger.info(f"=== MEDIAN CALCULATION DEBUG for {metric_column} ===")
# Find days with multiple data points per agent to show the difference
sample_groups = df.groupby(['date', 'agent_id']).size()
multi_point_days = sample_groups[sample_groups > 1].head(10) # Show up to 10 examples
logger.info(f"Found {len(multi_point_days)} agent-days with multiple data points (showing up to 10):")
mean_median_differences = []
for (date, agent_id), count in multi_point_days.items():
day_data = df[(df['date'] == date) & (df['agent_id'] == agent_id)]
values = day_data[metric_column].tolist()
calculated_mean = day_data[metric_column].mean()
calculated_median = day_data[metric_column].median()
agent_name = day_data['agent_name'].iloc[0] if not day_data.empty else f"Agent {agent_id}"
difference = abs(calculated_mean - calculated_median)
mean_median_differences.append(difference)
logger.info(f" {agent_name} on {date}: {count} values = {values}")
logger.info(f" MEAN: {calculated_mean:.4f}, MEDIAN: {calculated_median:.4f}, DIFF: {difference:.4f}")
# Summary statistics
if mean_median_differences:
avg_difference = sum(mean_median_differences) / len(mean_median_differences)
max_difference = max(mean_median_differences)
logger.info(f"Mean vs Median differences - Avg: {avg_difference:.4f}, Max: {max_difference:.4f}")
else:
logger.info("No days found with multiple data points per agent")
# Show total distribution of data points per day
single_point_days = len(sample_groups[sample_groups == 1])
multi_point_days_count = len(sample_groups[sample_groups > 1])
logger.info(f"Data distribution: {single_point_days} agent-days with 1 point, {multi_point_days_count} agent-days with multiple points")
# Group by date and agent, calculate median for each day
daily_agent_data = df.groupby(['date', 'agent_id']).agg({
metric_column: 'mean',
'agent_name': 'first',
'is_dummy': 'first',
'metric_type': 'first'
}).reset_index()
# Convert date back to datetime for plotting
daily_agent_data['timestamp'] = pd.to_datetime(daily_agent_data['date'])
# Log a few sample median values from the result
logger.info(f"Sample calculated median values:")
for i, row in daily_agent_data.head(5).iterrows():
logger.info(f" {row['agent_name']} on {row['date']}: median {metric_column} = {row[metric_column]:.4f}")
logger.info(f"Aggregated {len(df)} data points into {len(daily_agent_data)} daily values for {metric_column} using MEDIAN")
return daily_agent_data
def calculate_daily_medians(daily_agent_data, metric_column):
"""
Calculate daily medians across all agents for each date.
Args:
daily_agent_data: DataFrame with daily aggregated data per agent
metric_column: Name of the metric column ('apr' or 'roi')
Returns:
DataFrame with daily median values
"""
if daily_agent_data.empty:
return daily_agent_data
# For each date, calculate median across all agents (excluding missing data)
daily_medians = daily_agent_data.groupby('date').agg({
metric_column: 'median'
}).reset_index()
# Convert date back to datetime for plotting
daily_medians['timestamp'] = pd.to_datetime(daily_medians['date'])
logger.info(f"Calculated {len(daily_medians)} daily median values for {metric_column}")
return daily_medians
def calculate_moving_average_medians(daily_medians, metric_column, window_days=7):
"""
Calculate moving average of daily medians using a specified time window.
Args:
daily_medians: DataFrame with daily median values
metric_column: Name of the metric column ('apr' or 'roi')
window_days: Number of days for the moving average window
Returns:
DataFrame with moving average values added
"""
if daily_medians.empty:
return daily_medians
# Sort by timestamp
daily_medians = daily_medians.sort_values('timestamp').copy()
# Initialize moving average column
daily_medians['moving_avg'] = None
# Define the time window
time_window = pd.Timedelta(days=window_days)
logger.info(f"Calculating {window_days}-day moving average of daily medians for {metric_column}")
# Calculate moving averages for each timestamp
for i, row in daily_medians.iterrows():
current_time = row['timestamp']
window_start = current_time - time_window
# Get all median values within the time window
window_data = daily_medians[
(daily_medians['timestamp'] >= window_start) &
(daily_medians['timestamp'] <= current_time)
]
# Calculate the average of medians for the time window
if not window_data.empty:
daily_medians.at[i, 'moving_avg'] = window_data[metric_column].mean()
else:
# If no data points in the window, use the current value
daily_medians.at[i, 'moving_avg'] = row[metric_column]
logger.info(f"Calculated {window_days}-day moving averages with {len(daily_medians)} points")
return daily_medians
def create_combined_roi_time_series_graph(df):
"""Create a time series graph showing daily median ROI values with 7-day moving average"""
if len(df) == 0:
logger.error("No data to plot combined ROI graph")
fig = go.Figure()
fig.add_annotation(
text="No ROI data available",
x=0.5, y=0.5,
showarrow=False, font=dict(size=20)
)
return fig
# Calculate runtime for each agent from their actual first data point
logger.info(f"Calculating runtime for each agent from their actual start date")
agent_runtimes = {}
for agent_id in df['agent_id'].unique():
agent_data = df[df['agent_id'] == agent_id]
agent_name = agent_data['agent_name'].iloc[0]
first_report = agent_data['timestamp'].min() # Agent's actual start date
last_report = agent_data['timestamp'].max() # Agent's last report
runtime_days = (last_report - first_report).total_seconds() / (24 * 3600) # Convert to days
agent_runtimes[agent_id] = {
'agent_name': agent_name,
'first_report': first_report,
'last_report': last_report,
'runtime_days': runtime_days
}
# Calculate average runtime
avg_runtime = sum(data['runtime_days'] for data in agent_runtimes.values()) / len(agent_runtimes) if agent_runtimes else 0
logger.info(f"Average agent runtime from fixed start date: {avg_runtime:.2f} days")
# Log individual agent runtimes for debugging
for agent_id, data in agent_runtimes.items():
logger.info(f"Agent {data['agent_name']} (ID: {agent_id}): Runtime = {data['runtime_days']:.2f} days, Last report: {data['last_report']}")
# IMPORTANT: Clean and convert ROI data to ensure consistency
logger.info("Cleaning ROI data before conversion...")
def clean_roi_value(value):
"""Clean and convert ROI value to float"""
if pd.isna(value):
return None
# If it's already a number, return it
if isinstance(value, (int, float)):
return float(value)
# If it's a string, try to extract numeric value
if isinstance(value, str):
# Remove any non-numeric characters except decimal point and minus sign
import re
# Look for patterns like "value': 16.007665648354" and extract the number
match = re.search(r'[\d\.-]+', value)
if match:
try:
return float(match.group())
except ValueError:
logger.warning(f"Could not convert ROI value to float: {value}")
return None
else:
logger.warning(f"No numeric value found in ROI string: {value}")
return None
logger.warning(f"Unexpected ROI value type: {type(value)} - {value}")
return None
# Apply cleaning function to ROI column
df['roi'] = df['roi'].apply(clean_roi_value)
# Remove rows with invalid ROI values
initial_count = len(df)
df = df[df['roi'].notna()]
final_count = len(df)
removed_count = initial_count - final_count
if removed_count > 0:
logger.warning(f"Removed {removed_count} rows with invalid ROI values")
# Ensure ROI is float after cleaning
df['roi'] = df['roi'].astype(float)
# ROI values are already in percentage format from initial_value_fixer.py
df['metric_type'] = df['metric_type'].astype(str) # Ensure metric_type is string
# Get min and max time for shapes
min_time = df['timestamp'].min()
max_time = df['timestamp'].max()
# Use the actual start date from the data instead of a fixed date
x_start_date = min_time
# CRITICAL: Log the exact dataframe we're using for plotting to help debug
logger.info(f"ROI Graph data - shape: {df.shape}, columns: {df.columns}")
logger.info(f"ROI Graph data - unique agents: {df['agent_name'].unique().tolist()}")
logger.info(f"ROI Graph data - min ROI: {df['roi'].min()}, max ROI: {df['roi'].max()}")
# Export full dataframe to CSV for debugging
debug_csv = "debug_roi_data.csv"
df.to_csv(debug_csv)
logger.info(f"Exported ROI graph data to {debug_csv} for debugging")
# Create Plotly figure in a clean state
fig = go.Figure()
# Get min and max time for shapes
min_time = df['timestamp'].min()
max_time = df['timestamp'].max()
# Add background shapes for positive and negative regions
# Add shape for positive ROI region (above zero) - use reasonable fixed range
fig.add_shape(
type="rect",
fillcolor="rgba(230, 243, 255, 0.3)",
line=dict(width=0),
y0=0, y1=10, # Fixed positive range to avoid extreme outliers affecting the view
x0=min_time, x1=max_time,
layer="below"
)
# Add shape for negative ROI region (below zero) - use reasonable fixed range
fig.add_shape(
type="rect",
fillcolor="rgba(255, 230, 230, 0.3)",
line=dict(width=0),
y0=-10, y1=0, # Fixed negative range to avoid extreme outliers affecting the view
x0=min_time, x1=max_time,
layer="below"
)
# Add zero line
fig.add_shape(
type="line",
line=dict(dash="solid", width=1.5, color="black"),
y0=0, y1=0,
x0=min_time, x1=max_time
)
# Filter ROI outliers for better visualization (±200% range)
before_outlier_filter = len(df)
df = df[(df['roi'] <= 200) & (df['roi'] >= -200)]
after_outlier_filter = len(df)
excluded_by_outlier = before_outlier_filter - after_outlier_filter
logger.info(f"ROI outlier filtering: {before_outlier_filter} -> {after_outlier_filter} data points ({excluded_by_outlier} excluded)")
# IMPORTANT: Filter data by hardcoded date range (June 6 to June 25, 2025)
min_date = datetime(2025, 6, 6)
max_date = datetime(2025, 6, 25, 23, 59, 59) # Include all of June 25th
logger.info(f"Filtering ROI data to date range: {min_date} to {max_date}")
# Count data points before filtering
before_filter_count = len(df)
# Apply date filter
df = df[(df['timestamp'] >= min_date) & (df['timestamp'] <= max_date)]
# Count data points after filtering
after_filter_count = len(df)
excluded_by_date = before_filter_count - after_filter_count
logger.info(f"ROI Date filtering: {before_filter_count} -> {after_filter_count} data points ({excluded_by_date} excluded)")
# NEW APPROACH: Daily aggregation and median calculation
# Step 1: Aggregate data daily per agent (mean of values within each day)
daily_agent_data = aggregate_daily_data(df, 'roi')
# Step 2: Calculate daily medians across all agents
daily_medians = calculate_daily_medians(daily_agent_data, 'roi')
# Step 3: Calculate 7-day moving average of daily medians
daily_medians_with_ma = calculate_moving_average_medians(daily_medians, 'roi', window_days=7)
logger.info(f"NEW APPROACH: Processed {len(df)} raw points → {len(daily_agent_data)} daily agent values → {len(daily_medians)} daily medians")
# Find the last date where we have valid moving average data
last_valid_ma_date = daily_medians_with_ma[daily_medians_with_ma['moving_avg'].notna()]['timestamp'].max() if not daily_medians_with_ma['moving_avg'].dropna().empty else None
# If we don't have any valid moving average data, use the max time from the original data
last_valid_date = last_valid_ma_date if last_valid_ma_date is not None else df['timestamp'].max()
logger.info(f"Last valid moving average date: {last_valid_ma_date}")
logger.info(f"Using last valid date for graph: {last_valid_date}")
# Plot individual agent daily data points with agent names in hover, but limit display for scalability
if not daily_agent_data.empty:
# Group by agent to use different colors for each agent
unique_agents = daily_agent_data['agent_name'].unique()
colors = px.colors.qualitative.Plotly[:len(unique_agents)]
# Create a color map for agents
color_map = {agent: colors[i % len(colors)] for i, agent in enumerate(unique_agents)}
# Calculate the total number of data points per agent to determine which are most active
agent_counts = daily_agent_data['agent_name'].value_counts()
# Determine how many agents to show individually (limit to top 5 most active)
MAX_VISIBLE_AGENTS = 5
top_agents = agent_counts.nlargest(min(MAX_VISIBLE_AGENTS, len(agent_counts))).index.tolist()
logger.info(f"Showing {len(top_agents)} agents by default out of {len(unique_agents)} total agents")
# Add daily aggregated data points for each agent, but only make top agents visible by default
for agent_name in unique_agents:
agent_data = daily_agent_data[daily_agent_data['agent_name'] == agent_name]
# Explicitly convert to Python lists
x_values = agent_data['timestamp'].tolist()
y_values = agent_data['roi'].tolist()
# Change default visibility to False to hide all agent data points
is_visible = False
# Add data points as markers for ROI
fig.add_trace(
go.Scatter(
x=x_values,
y=y_values,
mode='markers', # Only markers for original data
marker=dict(
color=color_map[agent_name],
symbol='circle',
size=10,
line=dict(width=1, color='black')
),
name=f'Agent: {agent_name} (Daily ROI)',
hovertemplate='Time: %{x}<br>Daily ROI: %{y:.2f}%<br>Agent: ' + agent_name + '<extra></extra>',
visible=is_visible # All agents hidden by default
)
)
logger.info(f"Added daily ROI data points for agent {agent_name} with {len(x_values)} points (visible: {is_visible})")
# Add ROI 7-day moving average of daily medians as a smooth line
x_values_ma = daily_medians_with_ma['timestamp'].tolist()
y_values_ma = daily_medians_with_ma['moving_avg'].tolist()
# Create hover template for the ROI moving average line
hover_data_roi = []
for idx, row in daily_medians_with_ma.iterrows():
timestamp = row['timestamp']
# Format timestamp to show only date for daily data
formatted_timestamp = timestamp.strftime('%Y-%m-%d')
# Calculate number of active agents on this date
active_agents = len(daily_agent_data[daily_agent_data['timestamp'] == timestamp]['agent_id'].unique())
hover_data_roi.append(
f"Date: {formatted_timestamp}<br>Median ROI (7d window): {row['moving_avg']:.2f}%<br>Active agents: {active_agents}"
)
fig.add_trace(
go.Scatter(
x=x_values_ma,
y=y_values_ma,
mode='lines', # Only lines for moving average
line=dict(color='blue', width=3, shape='spline', smoothing=1.3), # Smooth curved line like APR
name='Median ROI (7d window)',
hovertext=hover_data_roi,
hoverinfo='text',
visible=True # Visible by default
)
)
logger.info(f"Added 7-day moving average of daily median ROI trace with {len(x_values_ma)} points")
# Update layout with average runtime information in the title
fig.update_layout(
title=dict(
text=f"Optimus Agents ROI (over avg. {avg_runtime:.1f} days runtime)",
font=dict(
family="Arial, sans-serif",
size=22,
color="black",
weight="bold"
)
),
xaxis_title=None, # Remove x-axis title to use annotation instead
yaxis_title=None, # Remove the y-axis title as we'll use annotations instead
template="plotly_white",
height=600, # Reduced height for better fit on smaller screens
autosize=True, # Enable auto-sizing for responsiveness
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1,
groupclick="toggleitem"
),
margin=dict(r=30, l=120, t=40, b=50), # Increased bottom margin for x-axis title
hovermode="closest"
)
# Add single annotation for y-axis
fig.add_annotation(
x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels
y=0, # Center of the y-axis
xref="paper",
yref="y",
text="ROI [%]",
showarrow=False,
font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
textangle=-90, # Rotate text to be vertical
align="center"
)
# Update layout for legend
fig.update_layout(
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1,
groupclick="toggleitem",
font=dict(
family="Arial, sans-serif",
size=14, # Adjusted font size
color="black",
weight="bold"
)
)
)
# Update y-axis with autoscaling for ROI to fit the actual data range
fig.update_yaxes(
showgrid=True,
gridwidth=1,
gridcolor='rgba(0,0,0,0.1)',
autorange=True, # Enable autoscaling to fit the actual data range
tickformat=".1f", # Format tick labels with 1 decimal place for better precision
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
title=None # Remove the built-in axis title since we're using annotations
)
# Update x-axis with better formatting and hardcoded date range (June 6 to June 25)
min_date = datetime(2025, 6, 6) # Hardcoded start date: June 6, 2025
max_date = datetime(2025, 6, 25) # Hardcoded end date: June 25, 2025
logger.info(f"ROI Graph - Hardcoded date range: min_date = {min_date}, max_date = {max_date}")
fig.update_xaxes(
showgrid=True,
gridwidth=1,
gridcolor='rgba(0,0,0,0.1)',
# Set hardcoded range from June 6 to June 17, 2025
range=[min_date, max_date],
autorange=False, # Explicitly disable autoscale
tickformat="%b %d", # Simplified date format without time
tickangle=-30, # Angle the labels for better readability
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
title=None # Remove built-in title to use annotation instead
)
try:
# Save the figure
graph_file = "optimus_roi_graph.html"
fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False)
# Also save as image for compatibility
img_file = "optimus_roi_graph.png"
try:
fig.write_image(img_file)
logger.info(f"ROI graph saved to {graph_file} and {img_file}")
except Exception as e:
logger.error(f"Error saving ROI image: {e}")
logger.info(f"ROI graph saved to {graph_file} only")
# Return the figure object for direct use in Gradio
return fig
except Exception as e:
# If the complex graph approach fails, create a simpler one
logger.error(f"Error creating advanced ROI graph: {e}")
logger.info("Falling back to Simpler ROI graph")
# Create a simpler graph as fallback
simple_fig = go.Figure()
# Add zero line
simple_fig.add_shape(
type="line",
line=dict(dash="solid", width=1.5, color="black"),
y0=0, y1=0,
x0=min_time, x1=max_time
)
# Add background shapes with fixed reasonable ranges
simple_fig.add_shape(
type="rect",
fillcolor="rgba(230, 243, 255, 0.3)",
line=dict(width=0),
y0=0, y1=10, # Fixed positive range to avoid extreme outliers affecting the view
x0=min_time, x1=max_time,
layer="below"
)
simple_fig.add_shape(
type="rect",
fillcolor="rgba(255, 230, 230, 0.3)",
line=dict(width=0),
y0=-10, y1=0, # Fixed negative range to avoid extreme outliers affecting the view
x0=min_time, x1=max_time,
layer="below"
)
# Simply plot the average ROI data with moving average
if not avg_roi_data.empty:
# Add moving average as a line
simple_fig.add_trace(
go.Scatter(
x=avg_roi_data_with_ma['timestamp'],
y=avg_roi_data_with_ma['moving_avg'],
mode='lines',
name='Average ROI (3d window)',
line=dict(width=2, color='blue') # Thinner line
)
)
# Simplified layout with adjusted y-axis range
simple_fig.update_layout(
title=dict(
text="Optimus Agents ROI",
font=dict(
family="Arial, sans-serif",
size=22,
color="black",
weight="bold"
)
),
xaxis_title=None,
yaxis_title=None,
template="plotly_white",
height=600,
autosize=True,
margin=dict(r=30, l=120, t=40, b=50)
)
# Update y-axis with fixed range for ROI (-10 to 10)
simple_fig.update_yaxes(
showgrid=True,
gridwidth=1,
gridcolor='rgba(0,0,0,0.1)',
range=[-10, 10], # Set fixed range from -10 to 10
tickformat=".2f",
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"),
title=None # Remove the built-in axis title since we're using annotations
)
# Update x-axis with better formatting and autoscaling
simple_fig.update_xaxes(
showgrid=True,
gridwidth=1,
gridcolor='rgba(0,0,0,0.1)',
autorange=True, # Enable autoscaling
tickformat="%b %d",
tickangle=-30,
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold")
)
# Save the figure
graph_file = "optimus_roi_graph.html"
simple_fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False)
# Return the simple figure
return simple_fig
def save_roi_to_csv(df):
"""Save the ROI data DataFrame to a CSV file and return the file path"""
if df.empty:
logger.error("No ROI data to save to CSV")
return None
# Define the CSV file path
csv_file = "optimus_roi_values.csv"
# Save to CSV
df.to_csv(csv_file, index=False)
logger.info(f"ROI data saved to {csv_file}")
return csv_file
def create_time_series_graph_per_agent(df):
"""Create a time series graph for each agent using Plotly"""
# Get unique agents
unique_agents = df['agent_id'].unique()
if len(unique_agents) == 0:
logger.error("No agent data to plot")
fig = go.Figure()
fig.add_annotation(
text="No agent data available",
x=0.5, y=0.5,
showarrow=False, font=dict(size=20)
)
return fig
# Create a subplot figure for each agent
fig = make_subplots(rows=len(unique_agents), cols=1,
subplot_titles=[f"Agent: {df[df['agent_id'] == agent_id]['agent_name'].iloc[0]}"
for agent_id in unique_agents],
vertical_spacing=0.1)
# Plot data for each agent
for i, agent_id in enumerate(unique_agents):
agent_data = df[df['agent_id'] == agent_id].copy()
agent_name = agent_data['agent_name'].iloc[0]
row = i + 1
# Add zero line to separate APR and Performance
fig.add_shape(
type="line", line=dict(dash="solid", width=1.5, color="black"),
y0=0, y1=0, x0=agent_data['timestamp'].min(), x1=agent_data['timestamp'].max(),
row=row, col=1
)
# Add background colors with dynamic values
fig.add_shape(
type="rect", fillcolor="rgba(230, 243, 255, 0.3)", line=dict(width=0),
y0=0, y1=agent_data['apr'].max() * 1.1 if not agent_data.empty else 10,
x0=agent_data['timestamp'].min(), x1=agent_data['timestamp'].max(),
row=row, col=1, layer="below"
)
fig.add_shape(
type="rect", fillcolor="rgba(255, 230, 230, 0.3)", line=dict(width=0),
y0=agent_data['apr'].min() * 1.1 if not agent_data.empty else -10, y1=0,
x0=agent_data['timestamp'].min(), x1=agent_data['timestamp'].max(),
row=row, col=1, layer="below"
)
# Create separate dataframes for different data types
apr_data = agent_data[agent_data['metric_type'] == 'APR']
perf_data = agent_data[agent_data['metric_type'] == 'Performance']
# Sort all data by timestamp for the line plots
combined_agent_data = agent_data.sort_values('timestamp')
# Add main line connecting all points
fig.add_trace(
go.Scatter(
x=combined_agent_data['timestamp'],
y=combined_agent_data['apr'],
mode='lines',
line=dict(color='purple', width=2),
name=f'{agent_name}',
legendgroup=agent_name,
showlegend=(i == 0), # Only show in legend once
hovertemplate='Time: %{x}<br>Value: %{y:.2f}<extra></extra>'
),
row=row, col=1
)
# Add scatter points for APR values
if not apr_data.empty:
fig.add_trace(
go.Scatter(
x=apr_data['timestamp'],
y=apr_data['apr'],
mode='markers',
marker=dict(color='blue', size=10, symbol='circle'),
name='APR',
legendgroup='APR',
showlegend=(i == 0),
hovertemplate='Time: %{x}<br>APR: %{y:.2f}<extra></extra>'
),
row=row, col=1
)
# Add scatter points for Performance values
if not perf_data.empty:
fig.add_trace(
go.Scatter(
x=perf_data['timestamp'],
y=perf_data['apr'],
mode='markers',
marker=dict(color='red', size=10, symbol='square'),
name='Performance',
legendgroup='Performance',
showlegend=(i == 0),
hovertemplate='Time: %{x}<br>Performance: %{y:.2f}<extra></extra>'
),
row=row, col=1
)
# Update axes
fig.update_xaxes(title_text="Time", row=row, col=1)
fig.update_yaxes(title_text="Value", row=row, col=1, gridcolor='rgba(0,0,0,0.1)')
# Update layout
fig.update_layout(
height=400 * len(unique_agents),
width=1000,
title_text="APR and Performance Values per Agent",
template="plotly_white",
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1
),
margin=dict(r=20, l=20, t=30, b=20),
hovermode="closest"
)
# Save the figure (still useful for reference)
graph_file = "optimus_apr_per_agent_graph.html"
fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False)
# Also save as image for compatibility
img_file = "optimus_apr_per_agent_graph.png"
fig.write_image(img_file)
logger.info(f"Per-agent graph saved to {graph_file} and {img_file}")
# Return the figure object for direct use in Gradio
return fig
def write_debug_info(df, fig):
"""Minimal debug info function"""
try:
# Just log minimal information
logger.debug(f"Graph created with {len(df)} data points and {len(fig.data)} traces")
return True
except Exception as e:
logger.error(f"Error writing debug info: {e}")
return False
def create_combined_time_series_graph(df):
"""Create a time series graph showing average APR values across all agents"""
if len(df) == 0:
logger.error("No data to plot combined graph")
fig = go.Figure()
fig.add_annotation(
text="No data available",
x=0.5, y=0.5,
showarrow=False, font=dict(size=20)
)
return fig
# IMPORTANT: Force data types to ensure consistency
df['apr'] = df['apr'].astype(float) # Ensure APR is float
df['metric_type'] = df['metric_type'].astype(str) # Ensure metric_type is string
# Get min and max time for shapes
min_time = df['timestamp'].min()
max_time = df['timestamp'].max()
# Use the actual start date from the data instead of a fixed date
x_start_date = min_time
# CRITICAL: Log the exact dataframe we're using for plotting to help debug
logger.info(f"Graph data - shape: {df.shape}, columns: {df.columns}")
logger.info(f"Graph data - unique agents: {df['agent_name'].unique().tolist()}")
logger.info("Graph data - all positive APR values only")
logger.info(f"Graph data - min APR: {df['apr'].min()}, max APR: {df['apr'].max()}")
# Export full dataframe to CSV for debugging
debug_csv = "debug_graph_data.csv"
df.to_csv(debug_csv)
logger.info(f"Exported graph data to {debug_csv} for debugging")
# Write detailed data report
with open("debug_graph_data_report.txt", "w") as f:
f.write("==== GRAPH DATA REPORT ====\n\n")
f.write(f"Total data points: {len(df)}\n")
f.write(f"Timestamp range: {df['timestamp'].min()} to {df['timestamp'].max()}\n\n")
# Output per-agent details
unique_agents = df['agent_id'].unique()
f.write(f"Number of agents: {len(unique_agents)}\n\n")
for agent_id in unique_agents:
agent_data = df[df['agent_id'] == agent_id]
agent_name = agent_data['agent_name'].iloc[0]
f.write(f"== Agent: {agent_name} (ID: {agent_id}) ==\n")
f.write(f" Total data points: {len(agent_data)}\n")
apr_data = agent_data[agent_data['metric_type'] == 'APR']
f.write(f" APR data points: {len(apr_data)}\n")
if not apr_data.empty:
f.write(f" APR values: {apr_data['apr'].tolist()}\n")
f.write(f" APR timestamps: {[ts.strftime('%Y-%m-%d %H:%M:%S') if ts is not None else 'None' for ts in apr_data['timestamp']]}\n")
f.write("\n")
logger.info("Generated detailed graph data report")
# ENSURE THERE ARE NO CONFLICTING AXES OR TRACES
# Create Plotly figure in a clean state
fig = go.Figure()
# Enable autoscaling instead of fixed ranges
logger.info("Using autoscaling for axes ranges")
# Add background shapes for APR and Performance regions
min_time = df['timestamp'].min()
max_time = df['timestamp'].max()
# Add shape for positive APR region (above zero)
fig.add_shape(
type="rect",
fillcolor="rgba(230, 243, 255, 0.3)",
line=dict(width=0),
y0=0, y1=apr_data['apr'].max() * 1.1 if not apr_data.empty else 10, # Dynamic positive value based on data
x0=min_time, x1=max_time,
layer="below"
)
# Add shape for negative APR region (below zero)
fig.add_shape(
type="rect",
fillcolor="rgba(255, 230, 230, 0.3)",
line=dict(width=0),
y0=apr_data['apr'].min() * 1.1 if not apr_data.empty else -10, y1=0, # Dynamic negative value based on data
x0=min_time, x1=max_time,
layer="below"
)
# Add zero line
fig.add_shape(
type="line",
line=dict(dash="solid", width=1.5, color="black"),
y0=0, y1=0,
x0=min_time, x1=max_time
)
# MODIFIED: Calculate average APR values across all agents for each timestamp
# Filter for APR data only
apr_data = df[df['metric_type'] == 'APR'].copy()
# Filter APR outliers (±200% range)
before_outlier_filter = len(apr_data)
apr_data = apr_data[(apr_data['apr'] <= 200) & (apr_data['apr'] >= -200)]
after_outlier_filter = len(apr_data)
excluded_by_outlier = before_outlier_filter - after_outlier_filter
logger.info(f"APR outlier filtering: {before_outlier_filter} -> {after_outlier_filter} data points ({excluded_by_outlier} excluded)")
# IMPORTANT: Filter data by hardcoded date range (June 6 to June 25, 2025)
min_date = datetime(2025, 6, 6)
max_date = datetime(2025, 6, 25, 23, 59, 59) # Include all of June 25th
logger.info(f"Filtering APR data to date range: {min_date} to {max_date}")
# Count data points before filtering
before_filter_count = len(apr_data)
# Apply date filter
apr_data = apr_data[(apr_data['timestamp'] >= min_date) & (apr_data['timestamp'] <= max_date)]
# Count data points after filtering
after_filter_count = len(apr_data)
excluded_by_date = before_filter_count - after_filter_count
logger.info(f"Date filtering: {before_filter_count} -> {after_filter_count} data points ({excluded_by_date} excluded)")
# NEW APPROACH: Daily aggregation and median calculation for APR
# Step 1: Aggregate data daily per agent (mean of values within each day)
daily_agent_data = aggregate_daily_data(apr_data, 'apr')
# Step 2: Calculate daily medians across all agents
daily_medians = calculate_daily_medians(daily_agent_data, 'apr')
# Step 3: Calculate 7-day moving average of daily medians
daily_medians_with_ma = calculate_moving_average_medians(daily_medians, 'apr', window_days=7)
# Also handle adjusted APR if it exists
daily_medians_adjusted = None
daily_medians_adjusted_with_ma = None
if 'adjusted_apr' in apr_data.columns and apr_data['adjusted_apr'].notna().any():
# Create a separate dataset for adjusted APR
apr_data_with_adjusted = apr_data[apr_data['adjusted_apr'].notna()].copy()
if not apr_data_with_adjusted.empty:
# Step 1: Aggregate adjusted APR data daily per agent
daily_agent_data_adjusted = aggregate_daily_data(apr_data_with_adjusted, 'adjusted_apr')
# Step 2: Calculate daily medians for adjusted APR
daily_medians_adjusted = calculate_daily_medians(daily_agent_data_adjusted, 'adjusted_apr')
# Step 3: Calculate 7-day moving average of daily medians for adjusted APR
daily_medians_adjusted_with_ma = calculate_moving_average_medians(daily_medians_adjusted, 'adjusted_apr', window_days=7)
logger.info(f"NEW APPROACH APR: Processed {len(apr_data)} raw points → {len(daily_agent_data)} daily agent values → {len(daily_medians)} daily medians")
if daily_medians_adjusted is not None:
logger.info(f"NEW APPROACH Adjusted APR: Processed adjusted APR data → {len(daily_medians_adjusted)} daily medians")
# This old moving average calculation is no longer needed with the new daily median approach
# Find the last date where we have valid moving average data
last_valid_ma_date = daily_medians_with_ma[daily_medians_with_ma['moving_avg'].notna()]['timestamp'].max() if not daily_medians_with_ma['moving_avg'].dropna().empty else None
# Find the last date where we have valid adjusted moving average data
last_valid_adj_ma_date = None
if daily_medians_adjusted_with_ma is not None and not daily_medians_adjusted_with_ma.empty:
last_valid_adj_ma_date = daily_medians_adjusted_with_ma[daily_medians_adjusted_with_ma['moving_avg'].notna()]['timestamp'].max() if not daily_medians_adjusted_with_ma['moving_avg'].dropna().empty else None
# Determine the last valid date for either moving average
last_valid_date = last_valid_ma_date
if last_valid_adj_ma_date is not None:
last_valid_date = max(last_valid_date, last_valid_adj_ma_date) if last_valid_date is not None else last_valid_adj_ma_date
# If we don't have any valid moving average data, use the max time from the original data
if last_valid_date is None:
last_valid_date = df['timestamp'].max()
logger.info(f"Last valid moving average date: {last_valid_ma_date}")
logger.info(f"Last valid adjusted moving average date: {last_valid_adj_ma_date}")
logger.info(f"Using last valid date for graph: {last_valid_date}")
# Plot individual agent data points with agent names in hover, but limit display for scalability
if not apr_data.empty:
# Group by agent to use different colors for each agent
unique_agents = apr_data['agent_name'].unique()
colors = px.colors.qualitative.Plotly[:len(unique_agents)]
# Create a color map for agents
color_map = {agent: colors[i % len(colors)] for i, agent in enumerate(unique_agents)}
# Calculate the total number of data points per agent to determine which are most active
agent_counts = apr_data['agent_name'].value_counts()
# Determine how many agents to show individually (limit to top 5 most active)
MAX_VISIBLE_AGENTS = 5
top_agents = agent_counts.nlargest(min(MAX_VISIBLE_AGENTS, len(agent_counts))).index.tolist()
logger.info(f"Showing {len(top_agents)} agents by default out of {len(unique_agents)} total agents")
# Add data points for each agent, but only make top agents visible by default
for agent_name in unique_agents:
agent_data = apr_data[apr_data['agent_name'] == agent_name]
# Explicitly convert to Python lists
x_values = agent_data['timestamp'].tolist()
y_values = agent_data['apr'].tolist()
# Change default visibility to False to hide all agent data points
is_visible = False
# Add data points as markers for APR
fig.add_trace(
go.Scatter(
x=x_values,
y=y_values,
mode='markers', # Only markers for original data
marker=dict(
color=color_map[agent_name],
symbol='circle',
size=10,
line=dict(width=1, color='black')
),
name=f'Agent: {agent_name} (APR)',
hovertemplate='Time: %{x}<br>APR: %{y:.2f}<br>Agent: ' + agent_name + '<extra></extra>',
visible=is_visible # All agents hidden by default
)
)
logger.info(f"Added APR data points for agent {agent_name} with {len(x_values)} points (visible: {is_visible})")
# Add data points for adjusted APR if it exists
if 'adjusted_apr' in agent_data.columns and agent_data['adjusted_apr'].notna().any():
x_values_adj = agent_data['timestamp'].tolist()
y_values_adj = agent_data['adjusted_apr'].tolist()
fig.add_trace(
go.Scatter(
x=x_values_adj,
y=y_values_adj,
mode='markers', # Only markers for original data
marker=dict(
color=color_map[agent_name],
symbol='diamond', # Different symbol for adjusted APR
size=10,
line=dict(width=1, color='black')
),
name=f'Agent: {agent_name} (Adjusted APR)',
hovertemplate='Time: %{x}<br>Adjusted APR: %{y:.2f}<br>Agent: ' + agent_name + '<extra></extra>',
visible=is_visible # All agents hidden by default
)
)
logger.info(f"Added Adjusted APR data points for agent {agent_name} with {len(x_values_adj)} points (visible: {is_visible})")
# Add APR 7-day moving average of daily medians as a smooth line
x_values_ma = daily_medians_with_ma['timestamp'].tolist()
y_values_ma = daily_medians_with_ma['moving_avg'].tolist()
# Create hover template for the APR moving average line
hover_data_apr = []
for idx, row in daily_medians_with_ma.iterrows():
timestamp = row['timestamp']
# Format timestamp to show only date for daily data
formatted_timestamp = timestamp.strftime('%Y-%m-%d')
# Calculate number of active agents on this date
active_agents = len(daily_agent_data[daily_agent_data['timestamp'] == timestamp]['agent_id'].unique())
hover_data_apr.append(
f"Date: {formatted_timestamp}<br>Median APR (7d window): {row['moving_avg']:.2f}%<br>Active agents: {active_agents}"
)
fig.add_trace(
go.Scatter(
x=x_values_ma,
y=y_values_ma,
mode='lines', # Only lines for moving average
line=dict(color='red', width=3, shape='spline', smoothing=1.3), # Smooth curved line
name='Median APR (7d window)',
hovertext=hover_data_apr,
hoverinfo='text',
visible=True # Visible by default
)
)
logger.info(f"Added 7-day moving average of daily median APR trace with {len(x_values_ma)} points")
# Add adjusted APR 7-day moving average line if it exists
if daily_medians_adjusted_with_ma is not None and not daily_medians_adjusted_with_ma.empty:
x_values_adj_ma = daily_medians_adjusted_with_ma['timestamp'].tolist()
y_values_adj_ma = daily_medians_adjusted_with_ma['moving_avg'].tolist()
# Create hover template for the adjusted APR moving average line
hover_data_adj = []
for idx, row in daily_medians_adjusted_with_ma.iterrows():
timestamp = row['timestamp']
# Format timestamp to show only date for daily data
formatted_timestamp = timestamp.strftime('%Y-%m-%d')
# Calculate number of active agents on this date
active_agents = len(daily_agent_data_adjusted[daily_agent_data_adjusted['timestamp'] == timestamp]['agent_id'].unique()) if 'daily_agent_data_adjusted' in locals() else 0
hover_data_adj.append(
f"Date: {formatted_timestamp}<br>Median Adjusted APR (7d window): {row['moving_avg']:.2f}%<br>Active agents: {active_agents}"
)
fig.add_trace(
go.Scatter(
x=x_values_adj_ma,
y=y_values_adj_ma,
mode='lines', # Only lines for moving average
line=dict(color='green', width=3, shape='spline', smoothing=1.3), # Smooth curved line
name='Median Adjusted APR (7d window)',
hovertext=hover_data_adj,
hoverinfo='text',
visible=True # Visible by default
)
)
logger.info(f"Added 7-day moving average of daily median Adjusted APR trace with {len(x_values_adj_ma)} points")
else:
logger.warning("No adjusted APR moving average data available to plot")
# Removed cumulative APR as requested
logger.info("Cumulative APR graph line has been removed as requested")
# Update layout - use simple boolean values everywhere
# Make chart responsive instead of fixed width
fig.update_layout(
title=dict(
text="Optimus Agents",
font=dict(
family="Arial, sans-serif",
size=22,
color="black",
weight="bold"
)
),
xaxis_title=None, # Remove x-axis title to use annotation instead
yaxis_title=None, # Remove the y-axis title as we'll use annotations instead
template="plotly_white",
height=600, # Reduced height for better fit on smaller screens
# Removed fixed width to enable responsiveness
autosize=True, # Enable auto-sizing for responsiveness
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1,
groupclick="toggleitem"
),
margin=dict(r=30, l=120, t=40, b=50), # Increased bottom margin for x-axis title
hovermode="closest"
)
# Add annotations for y-axis regions
fig.add_annotation(
x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels
y=-25, # Middle of the negative region
xref="paper",
yref="y",
text="Percent drawdown [%]",
showarrow=False,
font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
textangle=-90, # Rotate text to be vertical
align="center"
)
fig.add_annotation(
x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels
y=50, # Middle of the positive region
xref="paper",
yref="y",
text="Agent APR [%]",
showarrow=False,
font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
textangle=-90, # Rotate text to be vertical
align="center"
)
# Remove x-axis title annotation
# fig.add_annotation(
# x=0.5, # Center of the x-axis
# y=-0.15, # Below the x-axis
# xref="paper",
# yref="paper",
# text="Date",
# showarrow=False,
# font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
# align="center"
# )
# Update layout for legend
fig.update_layout(
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1,
groupclick="toggleitem",
font=dict(
family="Arial, sans-serif",
size=14, # Adjusted font size
color="black",
weight="bold"
)
)
)
# Update y-axis with autoscaling
fig.update_yaxes(
showgrid=True,
gridwidth=1,
gridcolor='rgba(0,0,0,0.1)',
# Use autoscaling instead of fixed range
autorange=True, # Enable autoscaling to fit the data
tickformat=".2f", # Format tick labels with 2 decimal places
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
title=None # Remove the built-in axis title since we're using annotations
)
# Update x-axis with better formatting and hardcoded date range (June 6 to June 25)
min_date = datetime(2025, 6, 6) # Hardcoded start date: June 6, 2025
max_date = datetime(2025, 6, 25) # Hardcoded end date: June 25, 2025
logger.info(f"APR Graph - Hardcoded date range: min_date = {min_date}, max_date = {max_date}")
fig.update_xaxes(
showgrid=True,
gridwidth=1,
gridcolor='rgba(0,0,0,0.1)',
# Set hardcoded range from June 6 to June 18, 2025
range=[min_date, max_date],
autorange=False, # Explicitly disable autoscale
tickformat="%b %d", # Simplified date format without time
tickangle=-30, # Angle the labels for better readability
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
title=None # Remove built-in title to use annotation instead
)
# SIMPLIFIED APPROACH: Do a direct plot without markers for comparison
# This creates a simple, reliable fallback plot if the advanced one fails
try:
# Write detailed debug information before saving the figure
write_debug_info(df, fig)
# Save the figure (still useful for reference)
graph_file = "optimus_apr_combined_graph.html"
fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False)
# Also save as image for compatibility
img_file = "optimus_apr_combined_graph.png"
try:
fig.write_image(img_file)
logger.info(f"Combined graph saved to {graph_file} and {img_file}")
except Exception as e:
logger.error(f"Error saving image: {e}")
logger.info(f"Combined graph saved to {graph_file} only")
# Return the figure object for direct use in Gradio
return fig
except Exception as e:
# If the complex graph approach fails, create a simpler one
logger.error(f"Error creating advanced graph: {e}")
logger.info("Falling back to Simpler graph")
# Create a simpler graph as fallback
simple_fig = go.Figure()
# Add zero line
simple_fig.add_shape(
type="line",
line=dict(dash="solid", width=1.5, color="black"),
y0=0, y1=0,
x0=min_time, x1=max_time
)
# Define colors for the fallback graph
fallback_colors = px.colors.qualitative.Plotly
# Simply plot the average APR data with moving average
if not avg_apr_data.empty:
# Sort by timestamp
avg_apr_data = avg_apr_data.sort_values('timestamp')
# Calculate both moving averages for the fallback graph
avg_apr_data_with_ma = avg_apr_data.copy()
avg_apr_data_with_ma['moving_avg'] = None # 2-hour window
avg_apr_data_with_ma['infinite_avg'] = None # Infinite window
# Define the time window (6 hours)
time_window = pd.Timedelta(hours=6)
# Calculate the moving averages for each timestamp
for i, row in avg_apr_data_with_ma.iterrows():
current_time = row['timestamp']
window_start = current_time - time_window
# Get all data points within the 2-hour time window
window_data = apr_data[
(apr_data['timestamp'] >= window_start) &
(apr_data['timestamp'] <= current_time)
]
# Get all data points up to the current timestamp (infinite window)
infinite_window_data = apr_data[
apr_data['timestamp'] <= current_time
]
# Calculate the average APR for the 2-hour time window
if not window_data.empty:
avg_apr_data_with_ma.at[i, 'moving_avg'] = window_data['apr'].mean()
else:
# If no data points in the window, use the current value
avg_apr_data_with_ma.at[i, 'moving_avg'] = row['apr']
# Calculate the average APR for the infinite window
if not infinite_window_data.empty:
avg_apr_data_with_ma.at[i, 'infinite_avg'] = infinite_window_data['apr'].mean()
else:
avg_apr_data_with_ma.at[i, 'infinite_avg'] = row['apr']
# Add data points for each agent, but only make top agents visible by default
unique_agents = apr_data['agent_name'].unique()
colors = px.colors.qualitative.Plotly[:len(unique_agents)]
color_map = {agent: colors[i % len(colors)] for i, agent in enumerate(unique_agents)}
# Calculate the total number of data points per agent
agent_counts = apr_data['agent_name'].value_counts()
# Determine how many agents to show individually (limit to top 5 most active)
MAX_VISIBLE_AGENTS = 5
top_agents = agent_counts.nlargest(min(MAX_VISIBLE_AGENTS, len(agent_counts))).index.tolist()
for agent_name in unique_agents:
agent_data = apr_data[apr_data['agent_name'] == agent_name]
# Determine if this agent should be visible by default
is_visible = agent_name in top_agents
# Add data points as markers
simple_fig.add_trace(
go.Scatter(
x=agent_data['timestamp'],
y=agent_data['apr'],
mode='markers',
name=f'Agent: {agent_name}',
marker=dict(
size=10,
color=color_map[agent_name]
),
hovertemplate='Time: %{x}<br>APR: %{y:.2f}<br>Agent: ' + agent_name + '<extra></extra>',
visible=is_visible # Only top agents visible by default
)
)
# Add 2-hour moving average as a line
simple_fig.add_trace(
go.Scatter(
x=avg_apr_data_with_ma['timestamp'],
y=avg_apr_data_with_ma['moving_avg'],
mode='lines',
name='Average APR (6h window)',
line=dict(width=2, color='red') # Thinner line
)
)
# Add infinite window moving average as another line
simple_fig.add_trace(
go.Scatter(
x=avg_apr_data_with_ma['timestamp'],
y=avg_apr_data_with_ma['infinite_avg'],
mode='lines',
name='Cumulative Average APR (all data)',
line=dict(width=4, color='green') # Thicker solid line
)
)
# Simplified layout with fixed y-axis range (-10 to 10) and increased size
simple_fig.update_layout(
title=dict(
text="Optimus Agents",
font=dict(
family="Arial, sans-serif",
size=22,
color="black",
weight="bold"
)
),
xaxis_title=None, # Remove x-axis title to use annotation instead
yaxis_title=None, # Remove the y-axis title as we'll use annotations instead
yaxis=dict(
# Fixed range from -10 to 10
range=[-10, 10], # Set fixed range from -10 to 10
tickformat=".2f", # Format tick labels with 2 decimal places
tickfont=dict(size=12) # Larger font for tick labels
),
height=600, # Reduced height for better fit
# Removed fixed width to enable responsiveness
autosize=True, # Enable auto-sizing for responsiveness
template="plotly_white", # Use a cleaner template
margin=dict(r=30, l=120, t=40, b=50) # Increased bottom margin for x-axis title
)
# Add annotations for y-axis regions in the fallback graph
simple_fig.add_annotation(
x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels
y=-25, # Middle of the negative region
xref="paper",
yref="y",
text="Percent drawdown [%]",
showarrow=False,
font=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
textangle=-90, # Rotate text to be vertical
align="center"
)
simple_fig.add_annotation(
x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels
y=50, # Middle of the positive region
xref="paper",
yref="y",
text="Agent APR [%]",
showarrow=False,
font=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
textangle=-90, # Rotate text to be vertical
align="center"
)
# Remove x-axis title annotation
# simple_fig.add_annotation(
# x=0.5, # Center of the x-axis
# y=-0.15, # Below the x-axis
# xref="paper",
# yref="paper",
# text="Date",
# showarrow=False,
# font=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
# align="center"
# )
# Update legend font for fallback graph
simple_fig.update_layout(
legend=dict(
font=dict(
family="Arial, sans-serif",
size=14, # Adjusted font size
color="black",
weight="bold"
)
)
)
# Apply autoscaling to the x-axis for the fallback graph
simple_fig.update_xaxes(
autorange=True, # Enable autoscaling
tickformat="%b %d", # Simplified date format without time
tickangle=-30,
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
title=None # Remove built-in title to use annotation instead
)
# Update y-axis tick font for fallback graph
simple_fig.update_yaxes(
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold") # Adjusted font size
)
# Add a note about hidden agents if there are more than MAX_VISIBLE_AGENTS
if len(unique_agents) > MAX_VISIBLE_AGENTS:
simple_fig.add_annotation(
text=f"Note: Only showing top {MAX_VISIBLE_AGENTS} agents by default. Toggle others in legend.",
xref="paper", yref="paper",
x=0.5, y=1.05,
showarrow=False,
font=dict(size=12, color="gray"),
align="center"
)
# Return the simple figure
return simple_fig
def save_to_csv(df):
"""Save the APR data DataFrame to a CSV file and return the file path"""
if df.empty:
logger.error("No APR data to save to CSV")
return None
# Define the CSV file path
csv_file = "optimus_apr_values.csv"
# Save to CSV
df.to_csv(csv_file, index=False)
logger.info(f"APR data saved to {csv_file}")
# Also generate a statistics CSV file
stats_df = generate_statistics_from_data(df)
stats_csv = "optimus_apr_statistics.csv"
stats_df.to_csv(stats_csv, index=False)
logger.info(f"Statistics saved to {stats_csv}")
# Log detailed statistics about adjusted APR
if 'adjusted_apr' in df.columns and df['adjusted_apr'].notna().any():
adjusted_stats = stats_df[stats_df['avg_adjusted_apr'].notna()]
logger.info(f"Agents with adjusted APR data: {len(adjusted_stats)} out of {len(stats_df)}")
for _, row in adjusted_stats.iterrows():
if row['agent_id'] != 'ALL': # Skip the overall stats row
logger.info(f"Agent {row['agent_name']} adjusted APR stats: avg={row['avg_adjusted_apr']:.2f}, min={row['min_adjusted_apr']:.2f}, max={row['max_adjusted_apr']:.2f}")
# Log overall adjusted APR stats
overall_row = stats_df[stats_df['agent_id'] == 'ALL']
if not overall_row.empty and pd.notna(overall_row['avg_adjusted_apr'].iloc[0]):
logger.info(f"Overall adjusted APR stats: avg={overall_row['avg_adjusted_apr'].iloc[0]:.2f}, min={overall_row['min_adjusted_apr'].iloc[0]:.2f}, max={overall_row['max_adjusted_apr'].iloc[0]:.2f}")
return csv_file
def generate_statistics_from_data(df):
"""Generate statistics from the APR data"""
if df.empty:
return pd.DataFrame()
# Get unique agents
unique_agents = df['agent_id'].unique()
stats_list = []
# Generate per-agent statistics
for agent_id in unique_agents:
agent_data = df[df['agent_id'] == agent_id]
agent_name = agent_data['agent_name'].iloc[0]
# APR statistics
apr_data = agent_data[agent_data['metric_type'] == 'APR']
real_apr = apr_data[apr_data['is_dummy'] == False]
# Performance statistics
perf_data = agent_data[agent_data['metric_type'] == 'Performance']
real_perf = perf_data[perf_data['is_dummy'] == False]
# Check if adjusted_apr exists and has non-null values
has_adjusted_apr = 'adjusted_apr' in apr_data.columns and apr_data['adjusted_apr'].notna().any()
stats = {
'agent_id': agent_id,
'agent_name': agent_name,
'total_points': len(agent_data),
'apr_points': len(apr_data),
'performance_points': len(perf_data),
'real_apr_points': len(real_apr),
'real_performance_points': len(real_perf),
'avg_apr': apr_data['apr'].mean() if not apr_data.empty else None,
'avg_performance': perf_data['apr'].mean() if not perf_data.empty else None,
'max_apr': apr_data['apr'].max() if not apr_data.empty else None,
'min_apr': apr_data['apr'].min() if not apr_data.empty else None,
'avg_adjusted_apr': apr_data['adjusted_apr'].mean() if has_adjusted_apr else None,
'max_adjusted_apr': apr_data['adjusted_apr'].max() if has_adjusted_apr else None,
'min_adjusted_apr': apr_data['adjusted_apr'].min() if has_adjusted_apr else None,
'latest_timestamp': agent_data['timestamp'].max().strftime('%Y-%m-%d %H:%M:%S') if not agent_data.empty else None
}
stats_list.append(stats)
# Generate overall statistics
apr_only = df[df['metric_type'] == 'APR']
perf_only = df[df['metric_type'] == 'Performance']
# Check if adjusted_apr exists and has non-null values for overall stats
has_adjusted_apr_overall = 'adjusted_apr' in apr_only.columns and apr_only['adjusted_apr'].notna().any()
overall_stats = {
'agent_id': 'ALL',
'agent_name': 'All Agents',
'total_points': len(df),
'apr_points': len(apr_only),
'performance_points': len(perf_only),
'real_apr_points': len(apr_only[apr_only['is_dummy'] == False]),
'real_performance_points': len(perf_only[perf_only['is_dummy'] == False]),
'avg_apr': apr_only['apr'].mean() if not apr_only.empty else None,
'avg_performance': perf_only['apr'].mean() if not perf_only.empty else None,
'max_apr': apr_only['apr'].max() if not apr_only.empty else None,
'min_apr': apr_only['apr'].min() if not apr_only.empty else None,
'avg_adjusted_apr': apr_only['adjusted_apr'].mean() if has_adjusted_apr_overall else None,
'max_adjusted_apr': apr_only['adjusted_apr'].max() if has_adjusted_apr_overall else None,
'min_adjusted_apr': apr_only['adjusted_apr'].min() if has_adjusted_apr_overall else None,
'latest_timestamp': df['timestamp'].max().strftime('%Y-%m-%d %H:%M:%S') if not df.empty else None
}
stats_list.append(overall_stats)
return pd.DataFrame(stats_list)
# Create dummy functions for the commented out imports
def create_transcation_visualizations():
"""Dummy implementation that returns a placeholder graph"""
fig = go.Figure()
fig.add_annotation(
text="Blockchain data loading disabled - placeholder visualization",
x=0.5, y=0.5, xref="paper", yref="paper",
showarrow=False, font=dict(size=20)
)
return fig
def create_active_agents_visualizations():
"""Dummy implementation that returns a placeholder graph"""
fig = go.Figure()
fig.add_annotation(
text="Blockchain data loading disabled - placeholder visualization",
x=0.5, y=0.5, xref="paper", yref="paper",
showarrow=False, font=dict(size=20)
)
return fig
# Dummy blockchain functions to replace the commented ones
def get_transfers(integrator: str, wallet: str) -> str:
"""Dummy function that returns an empty result"""
return {"transfers": []}
def fetch_and_aggregate_transactions():
"""Dummy function that returns empty data"""
return [], {}
# Function to parse the transaction data and prepare it for visualization
def process_transactions_and_agents(data):
"""Dummy function that returns empty dataframes"""
df_transactions = pd.DataFrame()
df_agents = pd.DataFrame(columns=['date', 'agent_count'])
df_agents_weekly = pd.DataFrame()
return df_transactions, df_agents, df_agents_weekly
# Function to create visualizations based on the metrics
def create_visualizations():
# Placeholder figures for testing
fig_swaps_chain = go.Figure()
fig_swaps_chain.add_annotation(
text="Blockchain data loading disabled - placeholder visualization",
x=0.5, y=0.5, xref="paper", yref="paper",
showarrow=False, font=dict(size=20)
)
fig_bridges_chain = go.Figure()
fig_bridges_chain.add_annotation(
text="Blockchain data loading disabled - placeholder visualization",
x=0.5, y=0.5, xref="paper", yref="paper",
showarrow=False, font=dict(size=20)
)
fig_agents_registered = go.Figure()
fig_agents_registered.add_annotation(
text="Blockchain data loading disabled - placeholder visualization",
x=0.5, y=0.5, xref="paper", yref="paper",
showarrow=False, font=dict(size=20)
)
fig_tvl = go.Figure()
fig_tvl.add_annotation(
text="Blockchain data loading disabled - placeholder visualization",
x=0.5, y=0.5, xref="paper", yref="paper",
showarrow=False, font=dict(size=20)
)
return fig_swaps_chain, fig_bridges_chain, fig_agents_registered, fig_tvl
# Modify dashboard function to make the plot container responsive
def dashboard():
with gr.Blocks() as demo:
gr.Markdown("# Average Optimus Agent Performance")
# Create tabs for APR and ROI metrics
with gr.Tabs():
# APR Metrics tab
with gr.Tab("APR Metrics"):
with gr.Column():
refresh_apr_btn = gr.Button("Refresh APR Data")
# Create container for plotly figure with responsive sizing
with gr.Column():
combined_apr_graph = gr.Plot(label="APR for All Agents", elem_id="responsive_apr_plot")
# Create compact toggle controls at the bottom of the graph
with gr.Row(visible=True):
gr.Markdown("##### Toggle Graph Lines", elem_id="apr_toggle_title")
with gr.Row():
with gr.Column():
with gr.Row(elem_id="apr_toggle_container"):
with gr.Column(scale=1, min_width=150):
apr_toggle = gr.Checkbox(label="APR Average", value=True, elem_id="apr_toggle")
with gr.Column(scale=1, min_width=150):
adjusted_apr_toggle = gr.Checkbox(label="ETH Adjusted APR Average", value=True, elem_id="adjusted_apr_toggle")
# Add a text area for status messages
apr_status_text = gr.Textbox(label="Status", value="Ready", interactive=False)
# ROI Metrics tab
with gr.Tab("ROI Metrics"):
with gr.Column():
refresh_roi_btn = gr.Button("Refresh ROI Data")
# Create container for plotly figure with responsive sizing
with gr.Column():
combined_roi_graph = gr.Plot(label="ROI for All Agents", elem_id="responsive_roi_plot")
# Create compact toggle controls at the bottom of the graph
with gr.Row(visible=True):
gr.Markdown("##### Toggle Graph Lines", elem_id="roi_toggle_title")
with gr.Row():
with gr.Column():
with gr.Row(elem_id="roi_toggle_container"):
with gr.Column(scale=1, min_width=150):
roi_toggle = gr.Checkbox(label="ROI Average", value=True, elem_id="roi_toggle")
# Add a text area for status messages
roi_status_text = gr.Textbox(label="Status", value="Ready", interactive=False)
# Add custom CSS for making the plots responsive
gr.HTML("""
<style>
/* Make plots responsive */
#responsive_apr_plot, #responsive_roi_plot {
width: 100% !important;
max-width: 100% !important;
}
#responsive_apr_plot > div, #responsive_roi_plot > div {
width: 100% !important;
height: auto !important;
min-height: 500px !important;
}
/* Toggle checkbox styling */
#apr_toggle .gr-checkbox {
accent-color: #e74c3c !important;
}
#adjusted_apr_toggle .gr-checkbox {
accent-color: #2ecc71 !important;
}
#roi_toggle .gr-checkbox {
accent-color: #3498db !important;
}
/* Make the toggle section more compact */
#apr_toggle_title, #roi_toggle_title {
margin-bottom: 0;
margin-top: 10px;
}
#apr_toggle_container, #roi_toggle_container {
margin-top: 5px;
}
/* Style the checkbox labels */
.gr-form.gr-box {
border: none !important;
background: transparent !important;
}
/* Make checkboxes and labels appear on the same line */
.gr-checkbox-container {
display: flex !important;
align-items: center !important;
}
/* Add colored indicators */
#apr_toggle .gr-checkbox-label::before {
content: "●";
color: #e74c3c;
margin-right: 5px;
}
#adjusted_apr_toggle .gr-checkbox-label::before {
content: "●";
color: #2ecc71;
margin-right: 5px;
}
#roi_toggle .gr-checkbox-label::before {
content: "●";
color: #3498db;
margin-right: 5px;
}
</style>
""")
# Function to update the APR graph
def update_apr_graph(show_apr_ma=True, show_adjusted_apr_ma=True):
# Generate visualization and get figure object directly
try:
combined_fig, _ = generate_apr_visualizations()
# Update visibility of traces based on toggle values
for i, trace in enumerate(combined_fig.data):
# Check if this is a moving average trace
if trace.name == 'Median APR (7d window)':
trace.visible = show_apr_ma
elif trace.name == 'Average ETH Adjusted APR (3d window)':
trace.visible = show_adjusted_apr_ma
return combined_fig
except Exception as e:
logger.exception("Error generating APR visualization")
# Create error figure
error_fig = go.Figure()
error_fig.add_annotation(
text=f"Error: {str(e)}",
x=0.5, y=0.5,
showarrow=False,
font=dict(size=15, color="red")
)
return error_fig
# Function to update the ROI graph
def update_roi_graph(show_roi_ma=True):
# Generate visualization and get figure object directly
try:
combined_fig, _ = generate_roi_visualizations()
# Update visibility of traces based on toggle values
for i, trace in enumerate(combined_fig.data):
# Check if this is a moving average trace
if trace.name == 'Median ROI (7d window)':
trace.visible = show_roi_ma
return combined_fig
except Exception as e:
logger.exception("Error generating ROI visualization")
# Create error figure
error_fig = go.Figure()
error_fig.add_annotation(
text=f"Error: {str(e)}",
x=0.5, y=0.5,
showarrow=False,
font=dict(size=15, color="red")
)
return error_fig
# Initialize the APR graph on load with a placeholder
apr_placeholder_fig = go.Figure()
apr_placeholder_fig.add_annotation(
text="Click 'Refresh APR Data' to load APR graph",
x=0.5, y=0.5,
showarrow=False,
font=dict(size=15)
)
combined_apr_graph.value = apr_placeholder_fig
# Initialize the ROI graph on load with a placeholder
roi_placeholder_fig = go.Figure()
roi_placeholder_fig.add_annotation(
text="Click 'Refresh ROI Data' to load ROI graph",
x=0.5, y=0.5,
showarrow=False,
font=dict(size=15)
)
combined_roi_graph.value = roi_placeholder_fig
# Function to update the APR graph based on toggle states
def update_apr_graph_with_toggles(apr_visible, adjusted_apr_visible):
return update_apr_graph(apr_visible, adjusted_apr_visible)
# Function to update the ROI graph based on toggle states
def update_roi_graph_with_toggles(roi_visible):
return update_roi_graph(roi_visible)
# Function to refresh APR data
def refresh_apr_data():
"""Refresh APR data from the database and update the visualization"""
try:
# Fetch new APR data
logger.info("Manually refreshing APR data...")
fetch_apr_data_from_db()
# Verify data was fetched successfully
if global_df is None or len(global_df) == 0:
logger.error("Failed to fetch APR data")
return combined_apr_graph.value, "Error: Failed to fetch APR data. Check the logs for details."
# Log info about fetched data with focus on adjusted_apr
may_10_2025 = datetime(2025, 5, 10)
if 'timestamp' in global_df and 'adjusted_apr' in global_df:
after_may_10 = global_df[global_df['timestamp'] >= may_10_2025]
with_adjusted_after_may_10 = after_may_10[after_may_10['adjusted_apr'].notna()]
logger.info(f"Data points after May 10th, 2025: {len(after_may_10)}")
logger.info(f"Data points with adjusted_apr after May 10th, 2025: {len(with_adjusted_after_may_10)}")
# Generate new visualization
logger.info("Generating new APR visualization...")
new_graph = update_apr_graph(apr_toggle.value, adjusted_apr_toggle.value)
return new_graph, "APR data refreshed successfully"
except Exception as e:
logger.error(f"Error refreshing APR data: {e}")
return combined_apr_graph.value, f"Error: {str(e)}"
# Function to refresh ROI data
def refresh_roi_data():
"""Refresh ROI data from the database and update the visualization"""
try:
# Fetch new ROI data
logger.info("Manually refreshing ROI data...")
fetch_apr_data_from_db() # This also fetches ROI data
# Verify data was fetched successfully
if global_roi_df is None or len(global_roi_df) == 0:
logger.error("Failed to fetch ROI data")
return combined_roi_graph.value, "Error: Failed to fetch ROI data. Check the logs for details."
# Generate new visualization
logger.info("Generating new ROI visualization...")
new_graph = update_roi_graph(roi_toggle.value)
return new_graph, "ROI data refreshed successfully"
except Exception as e:
logger.error(f"Error refreshing ROI data: {e}")
return combined_roi_graph.value, f"Error: {str(e)}"
# Set up the button click event for APR refresh
refresh_apr_btn.click(
fn=refresh_apr_data,
inputs=[],
outputs=[combined_apr_graph, apr_status_text]
)
# Set up the button click event for ROI refresh
refresh_roi_btn.click(
fn=refresh_roi_data,
inputs=[],
outputs=[combined_roi_graph, roi_status_text]
)
# Set up the toggle switch events for APR
apr_toggle.change(
fn=update_apr_graph_with_toggles,
inputs=[apr_toggle, adjusted_apr_toggle],
outputs=[combined_apr_graph]
)
adjusted_apr_toggle.change(
fn=update_apr_graph_with_toggles,
inputs=[apr_toggle, adjusted_apr_toggle],
outputs=[combined_apr_graph]
)
# Set up the toggle switch events for ROI
roi_toggle.change(
fn=update_roi_graph_with_toggles,
inputs=[roi_toggle],
outputs=[combined_roi_graph]
)
return demo
# Launch the dashboard
if __name__ == "__main__":
dashboard().launch()
def generate_adjusted_apr_report():
"""
Generate a detailed report about adjusted_apr data availability and save it to a file.
Returns the path to the generated report file.
"""
global global_df
if global_df is None or global_df.empty or 'adjusted_apr' not in global_df.columns:
logger.warning("No adjusted_apr data available for report generation")
return None
# Create a report file
report_path = "adjusted_apr_report.txt"
with open(report_path, "w") as f:
f.write("======== ADJUSTED APR DATA AVAILABILITY REPORT ========\n\n")
# Summary statistics
total_records = len(global_df)
records_with_adjusted = global_df['adjusted_apr'].notna().sum()
pct_with_adjusted = (records_with_adjusted / total_records) * 100 if total_records > 0 else 0
f.write(f"Total APR records: {total_records}\n")
f.write(f"Records with adjusted_apr: {records_with_adjusted} ({pct_with_adjusted:.2f}%)\n\n")
# First and last data points
if records_with_adjusted > 0:
has_adjusted = global_df[global_df['adjusted_apr'].notna()]
first_date = has_adjusted['timestamp'].min()
last_date = has_adjusted['timestamp'].max()
f.write(f"First adjusted_apr record: {first_date}\n")
f.write(f"Last adjusted_apr record: {last_date}\n")
f.write(f"Date range: {(last_date - first_date).days} days\n\n")
# Agent statistics
f.write("===== AGENT STATISTICS =====\n\n")
# Group by agent
agent_stats = []
for agent_id in global_df['agent_id'].unique():
agent_data = global_df[global_df['agent_id'] == agent_id]
agent_name = agent_data['agent_name'].iloc[0] if not agent_data.empty else f"Agent {agent_id}"
total_agent_records = len(agent_data)
agent_with_adjusted = agent_data['adjusted_apr'].notna().sum()
coverage_pct = (agent_with_adjusted / total_agent_records) * 100 if total_agent_records > 0 else 0
agent_stats.append({
'agent_id': agent_id,
'agent_name': agent_name,
'total_records': total_agent_records,
'with_adjusted': agent_with_adjusted,
'coverage_pct': coverage_pct
})
# Sort by coverage percentage (descending)
agent_stats.sort(key=lambda x: x['coverage_pct'], reverse=True)
# Write agent statistics
for agent in agent_stats:
f.write(f"Agent: {agent['agent_name']} (ID: {agent['agent_id']})\n")
f.write(f" Records: {agent['total_records']}\n")
f.write(f" With adjusted_apr: {agent['with_adjusted']} ({agent['coverage_pct']:.2f}%)\n")
# If agent has adjusted data, show date range
agent_data = global_df[global_df['agent_id'] == agent['agent_id']]
agent_adjusted = agent_data[agent_data['adjusted_apr'].notna()]
if not agent_adjusted.empty:
first = agent_adjusted['timestamp'].min()
last = agent_adjusted['timestamp'].max()
f.write(f" First adjusted_apr: {first}\n")
f.write(f" Last adjusted_apr: {last}\n")
f.write("\n")
# Check for May 10th cutoff issue
f.write("===== MAY 10TH CUTOFF ANALYSIS =====\n\n")
may_10_2025 = datetime(2025, 5, 10)
before_cutoff = global_df[global_df['timestamp'] < may_10_2025]
after_cutoff = global_df[global_df['timestamp'] >= may_10_2025]
# Calculate coverage before and after
before_total = len(before_cutoff)
before_with_adjusted = before_cutoff['adjusted_apr'].notna().sum()
before_pct = (before_with_adjusted / before_total) * 100 if before_total > 0 else 0
after_total = len(after_cutoff)
after_with_adjusted = after_cutoff['adjusted_apr'].notna().sum()
after_pct = (after_with_adjusted / after_total) * 100 if after_total > 0 else 0
f.write(f"Before May 10th, 2025:\n")
f.write(f" Records: {before_total}\n")
f.write(f" With adjusted_apr: {before_with_adjusted} ({before_pct:.2f}%)\n\n")
f.write(f"After May 10th, 2025:\n")
f.write(f" Records: {after_total}\n")
f.write(f" With adjusted_apr: {after_with_adjusted} ({after_pct:.2f}%)\n\n")
# Check for agents that had data before but not after
if before_total > 0 and after_total > 0:
agents_before = set(before_cutoff[before_cutoff['adjusted_apr'].notna()]['agent_id'].unique())
agents_after = set(after_cutoff[after_cutoff['adjusted_apr'].notna()]['agent_id'].unique())
missing_after = agents_before - agents_after
new_after = agents_after - agents_before
if missing_after:
f.write(f"Agents with adjusted_apr before May 10th but not after: {list(missing_after)}\n")
# For each missing agent, show the last date with adjusted_apr
for agent_id in missing_after:
agent_data = before_cutoff[(before_cutoff['agent_id'] == agent_id) &
(before_cutoff['adjusted_apr'].notna())]
if not agent_data.empty:
last_date = agent_data['timestamp'].max()
agent_name = agent_data['agent_name'].iloc[0]
f.write(f" {agent_name} (ID: {agent_id}): Last adjusted_apr on {last_date}\n")
if new_after:
f.write(f"\nAgents with adjusted_apr after May 10th but not before: {list(new_after)}\n")
logger.info(f"Adjusted APR report generated: {report_path}")
return report_path