File size: 11,293 Bytes
2425de8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 |
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta
import logging
# Get the logger
logger = logging.getLogger(__name__)
def generate_continuous_random_data(existing_data, end_time=None):
"""
Generate authentic-looking random data that continues from existing data
with adjusted APR following APR with a small offset
Args:
existing_data: DataFrame containing the existing data
end_time: Optional end time (defaults to current time)
Returns:
DataFrame with dummy data points
"""
# Use current time if not specified
if end_time is None:
end_time = datetime.now()
# Find the latest timestamp in the existing data
if not existing_data.empty:
start_time = existing_data['timestamp'].max() + timedelta(minutes=10)
else:
# If no existing data, start from 30 days ago
start_time = end_time - timedelta(days=30)
# Generate timestamps with 10-minute intervals
timestamps = []
current = start_time
while current <= end_time:
timestamps.append(current)
current += timedelta(minutes=10)
if not timestamps:
return pd.DataFrame() # No new data needed
# Get unique agents from existing data
if not existing_data.empty:
unique_agents = existing_data[['agent_id', 'agent_name']].drop_duplicates().to_dict('records')
else:
# Create one dummy agent if no existing data
unique_agents = [{'agent_id': 'dummy_agent', 'agent_name': 'Dummy Agent'}]
dummy_data_list = []
# For each agent, create continuous dummy data
for agent in unique_agents:
agent_id = agent['agent_id']
# Get the last real values for this agent to ensure continuity
last_apr = None
last_adjusted_apr = None
last_roi = None
if not existing_data.empty:
# Get last APR value
agent_apr_data = existing_data[(existing_data['agent_id'] == agent_id) &
(existing_data['metric_type'] == 'APR')]
if not agent_apr_data.empty:
last_apr = agent_apr_data['apr'].iloc[-1]
last_adjusted_apr = agent_apr_data['adjusted_apr'].iloc[-1]
# Get last ROI value
agent_roi_data = existing_data[(existing_data['agent_id'] == agent_id) &
(existing_data['metric_type'] == 'ROI')]
if not agent_roi_data.empty:
last_roi = agent_roi_data['roi'].iloc[-1]
# If no last values, start with reasonable values in our range
if last_apr is None or pd.isna(last_apr):
last_apr = random.uniform(-0.1, 0.1) # Start close to zero
if last_adjusted_apr is None or pd.isna(last_adjusted_apr):
# If we have APR but no adjusted APR, make it slightly different than APR
# Sometimes higher, sometimes lower to look more natural
if random.random() > 0.5:
last_adjusted_apr = last_apr + random.uniform(0.05, 0.15)
else:
last_adjusted_apr = last_apr - random.uniform(0.05, 0.15)
last_adjusted_apr = max(-0.5, min(1.0, last_adjusted_apr))
if last_roi is None or pd.isna(last_roi):
last_roi = random.uniform(-0.1, 0.1) # Start close to zero
# Generate APR values using smoother random walk
apr_values = [last_apr]
# Create a more natural pattern with some trends
# Define a few trend periods to make it look more authentic
num_points = len(timestamps)
trend_periods = []
# Create 3-5 trend periods
num_trends = random.randint(3, 5)
period_length = num_points // num_trends
for i in range(num_trends):
# Each trend has a direction (up, down, or sideways)
# and a strength (how strong the trend is)
direction = random.choice([-1, 0, 1]) # -1: down, 0: sideways, 1: up
strength = random.uniform(0.01, 0.03) # Smaller changes for more natural look
start_idx = i * period_length
end_idx = min((i + 1) * period_length, num_points)
trend_periods.append({
'start': start_idx,
'end': end_idx,
'direction': direction,
'strength': strength
})
# Generate values following the trends
for i in range(1, num_points):
# Find which trend period we're in
current_trend = None
for trend in trend_periods:
if trend['start'] <= i < trend['end']:
current_trend = trend
break
# If we couldn't find a trend (shouldn't happen), use a neutral trend
if current_trend is None:
current_trend = {'direction': 0, 'strength': 0.01}
# Base change is influenced by the trend
base_change = current_trend['direction'] * current_trend['strength']
# Add some randomness
random_change = random.normalvariate(0, 0.01) # Normal distribution for more natural randomness
# Previous momentum (30% influence to make it smoother)
prev_change = 0 if i == 1 else apr_values[i-1] - apr_values[i-2]
momentum = 0.3 * prev_change
# Combine all factors
total_change = base_change + random_change + momentum
# Apply the change
new_value = apr_values[i-1] + total_change
# Keep within reasonable bounds (-0.5 to 1.0)
new_value = max(-0.5, min(1.0, new_value))
apr_values.append(new_value)
# Generate adjusted APR values that follow APR with a small, varying offset
adjusted_apr_values = []
for i, apr_value in enumerate(apr_values):
# Make adjusted APR follow APR but with a small, varying offset
# Sometimes higher, sometimes lower to look more natural
if i % 5 == 0: # Periodically recalculate the offset direction
offset_direction = 1 if random.random() > 0.5 else -1
offset = offset_direction * random.uniform(0.05, 0.15)
adjusted_value = apr_value + offset
# Keep within reasonable bounds (-0.5 to 1.0)
adjusted_value = max(-0.5, min(1.0, adjusted_value))
adjusted_apr_values.append(adjusted_value)
# Generate ROI values with a completely different approach to ensure better distribution
# Note: ROI values will be multiplied by 100 in app.py, so we need to generate values
# between -0.01 and 0 to get final values between -1 and 0
# Instead of building on the last_roi value, we'll generate a completely new sequence
# that's well-distributed between -0.01 and 0
# First, create a sequence of target values that we want to hit
# This ensures we get good coverage of the entire range
target_points = []
for i in range(5): # Create 5 target points
# Distribute targets across the range, but avoid exactly 0
target = -0.01 + (i * 0.0025) # Values from -0.01 to -0.0025
target_points.append(target)
# Shuffle the targets to make the pattern less predictable
random.shuffle(target_points)
# Divide the total points into segments, one for each target
segment_length = num_points // len(target_points)
# Generate the ROI values
roi_values = []
# Start with the last real value, or a random value in our range if none exists
if last_roi is None or pd.isna(last_roi) or last_roi < -0.01 or last_roi > 0:
# If no valid last value, start in the middle of our range
current_value = -0.005
else:
current_value = last_roi
roi_values.append(current_value)
# For each segment, gradually move toward the target value
for segment_idx, target in enumerate(target_points):
start_idx = segment_idx * segment_length
end_idx = min((segment_idx + 1) * segment_length, num_points)
# How many steps we have to reach the target
steps = end_idx - start_idx
if steps <= 0:
continue # Skip if this segment has no points
# Current value is the last value in roi_values
current_value = roi_values[-1]
# Calculate how much to change per step to reach the target
step_change = (target - current_value) / steps
# Generate values for this segment
for step in range(steps):
# Base change to move toward target
base_change = step_change
# Add some randomness, but make sure we're still generally moving toward the target
random_factor = random.uniform(-0.0005, 0.0005)
# Calculate new value
new_value = current_value + base_change + random_factor
# Ensure we stay within range
new_value = max(-0.01, min(0, new_value))
roi_values.append(new_value)
current_value = new_value
# If we didn't generate enough points, add more
while len(roi_values) < num_points + 1:
# Add a point with small random variation from the last point
last_value = roi_values[-1]
new_value = last_value + random.uniform(-0.001, 0.001)
new_value = max(-0.01, min(0, new_value))
roi_values.append(new_value)
# If we generated too many points, trim the list
roi_values = roi_values[:num_points + 1]
# Create dummy data points
for i, timestamp in enumerate(timestamps):
# APR data
dummy_apr = {
'timestamp': timestamp,
'apr': apr_values[i],
'adjusted_apr': adjusted_apr_values[i],
'roi': None,
'agent_id': agent_id,
'agent_name': agent['agent_name'],
'is_dummy': True,
'metric_type': 'APR'
}
dummy_data_list.append(dummy_apr)
# ROI data
dummy_roi = {
'timestamp': timestamp,
'apr': None,
'adjusted_apr': None,
'roi': roi_values[i],
'agent_id': agent_id,
'agent_name': agent['agent_name'],
'is_dummy': True,
'metric_type': 'ROI'
}
dummy_data_list.append(dummy_roi)
return pd.DataFrame(dummy_data_list)
|