File size: 11,293 Bytes
2425de8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta
import logging

# Get the logger
logger = logging.getLogger(__name__)

def generate_continuous_random_data(existing_data, end_time=None):
    """
    Generate authentic-looking random data that continues from existing data
    with adjusted APR following APR with a small offset
    
    Args:
        existing_data: DataFrame containing the existing data
        end_time: Optional end time (defaults to current time)
    
    Returns:
        DataFrame with dummy data points
    """
    # Use current time if not specified
    if end_time is None:
        end_time = datetime.now()
    
    # Find the latest timestamp in the existing data
    if not existing_data.empty:
        start_time = existing_data['timestamp'].max() + timedelta(minutes=10)
    else:
        # If no existing data, start from 30 days ago
        start_time = end_time - timedelta(days=30)
    
    # Generate timestamps with 10-minute intervals
    timestamps = []
    current = start_time
    while current <= end_time:
        timestamps.append(current)
        current += timedelta(minutes=10)
    
    if not timestamps:
        return pd.DataFrame()  # No new data needed
    
    # Get unique agents from existing data
    if not existing_data.empty:
        unique_agents = existing_data[['agent_id', 'agent_name']].drop_duplicates().to_dict('records')
    else:
        # Create one dummy agent if no existing data
        unique_agents = [{'agent_id': 'dummy_agent', 'agent_name': 'Dummy Agent'}]
    
    dummy_data_list = []
    
    # For each agent, create continuous dummy data
    for agent in unique_agents:
        agent_id = agent['agent_id']
        
        # Get the last real values for this agent to ensure continuity
        last_apr = None
        last_adjusted_apr = None
        last_roi = None
        
        if not existing_data.empty:
            # Get last APR value
            agent_apr_data = existing_data[(existing_data['agent_id'] == agent_id) & 
                                          (existing_data['metric_type'] == 'APR')]
            if not agent_apr_data.empty:
                last_apr = agent_apr_data['apr'].iloc[-1]
                last_adjusted_apr = agent_apr_data['adjusted_apr'].iloc[-1]
            
            # Get last ROI value
            agent_roi_data = existing_data[(existing_data['agent_id'] == agent_id) & 
                                          (existing_data['metric_type'] == 'ROI')]
            if not agent_roi_data.empty:
                last_roi = agent_roi_data['roi'].iloc[-1]
        
        # If no last values, start with reasonable values in our range
        if last_apr is None or pd.isna(last_apr):
            last_apr = random.uniform(-0.1, 0.1)  # Start close to zero
        
        if last_adjusted_apr is None or pd.isna(last_adjusted_apr):
            # If we have APR but no adjusted APR, make it slightly different than APR
            # Sometimes higher, sometimes lower to look more natural
            if random.random() > 0.5:
                last_adjusted_apr = last_apr + random.uniform(0.05, 0.15)
            else:
                last_adjusted_apr = last_apr - random.uniform(0.05, 0.15)
            last_adjusted_apr = max(-0.5, min(1.0, last_adjusted_apr))
        
        if last_roi is None or pd.isna(last_roi):
            last_roi = random.uniform(-0.1, 0.1)  # Start close to zero
        
        # Generate APR values using smoother random walk
        apr_values = [last_apr]
        
        # Create a more natural pattern with some trends
        # Define a few trend periods to make it look more authentic
        num_points = len(timestamps)
        trend_periods = []
        
        # Create 3-5 trend periods
        num_trends = random.randint(3, 5)
        period_length = num_points // num_trends
        
        for i in range(num_trends):
            # Each trend has a direction (up, down, or sideways)
            # and a strength (how strong the trend is)
            direction = random.choice([-1, 0, 1])  # -1: down, 0: sideways, 1: up
            strength = random.uniform(0.01, 0.03)  # Smaller changes for more natural look
            
            start_idx = i * period_length
            end_idx = min((i + 1) * period_length, num_points)
            
            trend_periods.append({
                'start': start_idx,
                'end': end_idx,
                'direction': direction,
                'strength': strength
            })
        
        # Generate values following the trends
        for i in range(1, num_points):
            # Find which trend period we're in
            current_trend = None
            for trend in trend_periods:
                if trend['start'] <= i < trend['end']:
                    current_trend = trend
                    break
            
            # If we couldn't find a trend (shouldn't happen), use a neutral trend
            if current_trend is None:
                current_trend = {'direction': 0, 'strength': 0.01}
            
            # Base change is influenced by the trend
            base_change = current_trend['direction'] * current_trend['strength']
            
            # Add some randomness
            random_change = random.normalvariate(0, 0.01)  # Normal distribution for more natural randomness
            
            # Previous momentum (30% influence to make it smoother)
            prev_change = 0 if i == 1 else apr_values[i-1] - apr_values[i-2]
            momentum = 0.3 * prev_change
            
            # Combine all factors
            total_change = base_change + random_change + momentum
            
            # Apply the change
            new_value = apr_values[i-1] + total_change
            
            # Keep within reasonable bounds (-0.5 to 1.0)
            new_value = max(-0.5, min(1.0, new_value))
            
            apr_values.append(new_value)
        
        # Generate adjusted APR values that follow APR with a small, varying offset
        adjusted_apr_values = []
        for i, apr_value in enumerate(apr_values):
            # Make adjusted APR follow APR but with a small, varying offset
            # Sometimes higher, sometimes lower to look more natural
            if i % 5 == 0:  # Periodically recalculate the offset direction
                offset_direction = 1 if random.random() > 0.5 else -1
            
            offset = offset_direction * random.uniform(0.05, 0.15)
            adjusted_value = apr_value + offset
            
            # Keep within reasonable bounds (-0.5 to 1.0)
            adjusted_value = max(-0.5, min(1.0, adjusted_value))
            adjusted_apr_values.append(adjusted_value)
        
        # Generate ROI values with a completely different approach to ensure better distribution
        # Note: ROI values will be multiplied by 100 in app.py, so we need to generate values
        # between -0.01 and 0 to get final values between -1 and 0
        
        # Instead of building on the last_roi value, we'll generate a completely new sequence
        # that's well-distributed between -0.01 and 0
        
        # First, create a sequence of target values that we want to hit
        # This ensures we get good coverage of the entire range
        target_points = []
        for i in range(5):  # Create 5 target points
            # Distribute targets across the range, but avoid exactly 0
            target = -0.01 + (i * 0.0025)  # Values from -0.01 to -0.0025
            target_points.append(target)
        
        # Shuffle the targets to make the pattern less predictable
        random.shuffle(target_points)
        
        # Divide the total points into segments, one for each target
        segment_length = num_points // len(target_points)
        
        # Generate the ROI values
        roi_values = []
        
        # Start with the last real value, or a random value in our range if none exists
        if last_roi is None or pd.isna(last_roi) or last_roi < -0.01 or last_roi > 0:
            # If no valid last value, start in the middle of our range
            current_value = -0.005
        else:
            current_value = last_roi
        
        roi_values.append(current_value)
        
        # For each segment, gradually move toward the target value
        for segment_idx, target in enumerate(target_points):
            start_idx = segment_idx * segment_length
            end_idx = min((segment_idx + 1) * segment_length, num_points)
            
            # How many steps we have to reach the target
            steps = end_idx - start_idx
            
            if steps <= 0:
                continue  # Skip if this segment has no points
            
            # Current value is the last value in roi_values
            current_value = roi_values[-1]
            
            # Calculate how much to change per step to reach the target
            step_change = (target - current_value) / steps
            
            # Generate values for this segment
            for step in range(steps):
                # Base change to move toward target
                base_change = step_change
                
                # Add some randomness, but make sure we're still generally moving toward the target
                random_factor = random.uniform(-0.0005, 0.0005)
                
                # Calculate new value
                new_value = current_value + base_change + random_factor
                
                # Ensure we stay within range
                new_value = max(-0.01, min(0, new_value))
                
                roi_values.append(new_value)
                current_value = new_value
        
        # If we didn't generate enough points, add more
        while len(roi_values) < num_points + 1:
            # Add a point with small random variation from the last point
            last_value = roi_values[-1]
            new_value = last_value + random.uniform(-0.001, 0.001)
            new_value = max(-0.01, min(0, new_value))
            roi_values.append(new_value)
        
        # If we generated too many points, trim the list
        roi_values = roi_values[:num_points + 1]
        
        # Create dummy data points
        for i, timestamp in enumerate(timestamps):
            # APR data
            dummy_apr = {
                'timestamp': timestamp,
                'apr': apr_values[i],
                'adjusted_apr': adjusted_apr_values[i],
                'roi': None,
                'agent_id': agent_id,
                'agent_name': agent['agent_name'],
                'is_dummy': True,
                'metric_type': 'APR'
            }
            dummy_data_list.append(dummy_apr)
            
            # ROI data
            dummy_roi = {
                'timestamp': timestamp,
                'apr': None,
                'adjusted_apr': None,
                'roi': roi_values[i],
                'agent_id': agent_id,
                'agent_name': agent['agent_name'],
                'is_dummy': True,
                'metric_type': 'ROI'
            }
            dummy_data_list.append(dummy_roi)
    
    return pd.DataFrame(dummy_data_list)