|
import pandas as pd |
|
import numpy as np |
|
import random |
|
from datetime import datetime, timedelta |
|
import logging |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
def generate_continuous_random_data(existing_data, end_time=None): |
|
""" |
|
Generate authentic-looking random data that continues from existing data |
|
with adjusted APR following APR with a small offset |
|
|
|
Args: |
|
existing_data: DataFrame containing the existing data |
|
end_time: Optional end time (defaults to current time) |
|
|
|
Returns: |
|
DataFrame with dummy data points |
|
""" |
|
|
|
if end_time is None: |
|
end_time = datetime.now() |
|
|
|
|
|
if not existing_data.empty: |
|
start_time = existing_data['timestamp'].max() + timedelta(minutes=10) |
|
else: |
|
|
|
start_time = end_time - timedelta(days=30) |
|
|
|
|
|
timestamps = [] |
|
current = start_time |
|
while current <= end_time: |
|
timestamps.append(current) |
|
current += timedelta(minutes=10) |
|
|
|
if not timestamps: |
|
return pd.DataFrame() |
|
|
|
|
|
if not existing_data.empty: |
|
unique_agents = existing_data[['agent_id', 'agent_name']].drop_duplicates().to_dict('records') |
|
else: |
|
|
|
unique_agents = [{'agent_id': 'dummy_agent', 'agent_name': 'Dummy Agent'}] |
|
|
|
dummy_data_list = [] |
|
|
|
|
|
for agent in unique_agents: |
|
agent_id = agent['agent_id'] |
|
|
|
|
|
last_apr = None |
|
last_adjusted_apr = None |
|
last_roi = None |
|
|
|
if not existing_data.empty: |
|
|
|
agent_apr_data = existing_data[(existing_data['agent_id'] == agent_id) & |
|
(existing_data['metric_type'] == 'APR')] |
|
if not agent_apr_data.empty: |
|
last_apr = agent_apr_data['apr'].iloc[-1] |
|
last_adjusted_apr = agent_apr_data['adjusted_apr'].iloc[-1] |
|
|
|
|
|
agent_roi_data = existing_data[(existing_data['agent_id'] == agent_id) & |
|
(existing_data['metric_type'] == 'ROI')] |
|
if not agent_roi_data.empty: |
|
last_roi = agent_roi_data['roi'].iloc[-1] |
|
|
|
|
|
if last_apr is None or pd.isna(last_apr): |
|
last_apr = random.uniform(-0.1, 0.1) |
|
|
|
if last_adjusted_apr is None or pd.isna(last_adjusted_apr): |
|
|
|
|
|
if random.random() > 0.5: |
|
last_adjusted_apr = last_apr + random.uniform(0.05, 0.15) |
|
else: |
|
last_adjusted_apr = last_apr - random.uniform(0.05, 0.15) |
|
last_adjusted_apr = max(-0.5, min(1.0, last_adjusted_apr)) |
|
|
|
if last_roi is None or pd.isna(last_roi): |
|
last_roi = random.uniform(-0.1, 0.1) |
|
|
|
|
|
apr_values = [last_apr] |
|
|
|
|
|
|
|
num_points = len(timestamps) |
|
trend_periods = [] |
|
|
|
|
|
num_trends = random.randint(3, 5) |
|
period_length = num_points // num_trends |
|
|
|
for i in range(num_trends): |
|
|
|
|
|
direction = random.choice([-1, 0, 1]) |
|
strength = random.uniform(0.01, 0.03) |
|
|
|
start_idx = i * period_length |
|
end_idx = min((i + 1) * period_length, num_points) |
|
|
|
trend_periods.append({ |
|
'start': start_idx, |
|
'end': end_idx, |
|
'direction': direction, |
|
'strength': strength |
|
}) |
|
|
|
|
|
for i in range(1, num_points): |
|
|
|
current_trend = None |
|
for trend in trend_periods: |
|
if trend['start'] <= i < trend['end']: |
|
current_trend = trend |
|
break |
|
|
|
|
|
if current_trend is None: |
|
current_trend = {'direction': 0, 'strength': 0.01} |
|
|
|
|
|
base_change = current_trend['direction'] * current_trend['strength'] |
|
|
|
|
|
random_change = random.normalvariate(0, 0.01) |
|
|
|
|
|
prev_change = 0 if i == 1 else apr_values[i-1] - apr_values[i-2] |
|
momentum = 0.3 * prev_change |
|
|
|
|
|
total_change = base_change + random_change + momentum |
|
|
|
|
|
new_value = apr_values[i-1] + total_change |
|
|
|
|
|
new_value = max(-0.5, min(1.0, new_value)) |
|
|
|
apr_values.append(new_value) |
|
|
|
|
|
adjusted_apr_values = [] |
|
for i, apr_value in enumerate(apr_values): |
|
|
|
|
|
if i % 5 == 0: |
|
offset_direction = 1 if random.random() > 0.5 else -1 |
|
|
|
offset = offset_direction * random.uniform(0.05, 0.15) |
|
adjusted_value = apr_value + offset |
|
|
|
|
|
adjusted_value = max(-0.5, min(1.0, adjusted_value)) |
|
adjusted_apr_values.append(adjusted_value) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
target_points = [] |
|
for i in range(5): |
|
|
|
target = -0.01 + (i * 0.0025) |
|
target_points.append(target) |
|
|
|
|
|
random.shuffle(target_points) |
|
|
|
|
|
segment_length = num_points // len(target_points) |
|
|
|
|
|
roi_values = [] |
|
|
|
|
|
if last_roi is None or pd.isna(last_roi) or last_roi < -0.01 or last_roi > 0: |
|
|
|
current_value = -0.005 |
|
else: |
|
current_value = last_roi |
|
|
|
roi_values.append(current_value) |
|
|
|
|
|
for segment_idx, target in enumerate(target_points): |
|
start_idx = segment_idx * segment_length |
|
end_idx = min((segment_idx + 1) * segment_length, num_points) |
|
|
|
|
|
steps = end_idx - start_idx |
|
|
|
if steps <= 0: |
|
continue |
|
|
|
|
|
current_value = roi_values[-1] |
|
|
|
|
|
step_change = (target - current_value) / steps |
|
|
|
|
|
for step in range(steps): |
|
|
|
base_change = step_change |
|
|
|
|
|
random_factor = random.uniform(-0.0005, 0.0005) |
|
|
|
|
|
new_value = current_value + base_change + random_factor |
|
|
|
|
|
new_value = max(-0.01, min(0, new_value)) |
|
|
|
roi_values.append(new_value) |
|
current_value = new_value |
|
|
|
|
|
while len(roi_values) < num_points + 1: |
|
|
|
last_value = roi_values[-1] |
|
new_value = last_value + random.uniform(-0.001, 0.001) |
|
new_value = max(-0.01, min(0, new_value)) |
|
roi_values.append(new_value) |
|
|
|
|
|
roi_values = roi_values[:num_points + 1] |
|
|
|
|
|
for i, timestamp in enumerate(timestamps): |
|
|
|
dummy_apr = { |
|
'timestamp': timestamp, |
|
'apr': apr_values[i], |
|
'adjusted_apr': adjusted_apr_values[i], |
|
'roi': None, |
|
'agent_id': agent_id, |
|
'agent_name': agent['agent_name'], |
|
'is_dummy': True, |
|
'metric_type': 'APR' |
|
} |
|
dummy_data_list.append(dummy_apr) |
|
|
|
|
|
dummy_roi = { |
|
'timestamp': timestamp, |
|
'apr': None, |
|
'adjusted_apr': None, |
|
'roi': roi_values[i], |
|
'agent_id': agent_id, |
|
'agent_name': agent['agent_name'], |
|
'is_dummy': True, |
|
'metric_type': 'ROI' |
|
} |
|
dummy_data_list.append(dummy_roi) |
|
|
|
return pd.DataFrame(dummy_data_list) |
|
|