lyimo commited on
Commit
8917188
·
verified ·
1 Parent(s): f83de07

Update part1_data.py

Browse files
Files changed (1) hide show
  1. part1_data.py +117 -256
part1_data.py CHANGED
@@ -66,7 +66,7 @@ class TobaccoAnalyzer:
66
  data = response.json()
67
  weather_data = {
68
  'date': date,
69
- 'temperature': float(data['main']['temp']), # Ensure numeric
70
  'humidity': float(data['main']['humidity']),
71
  'rainfall': float(data.get('rain', {}).get('1h', 0)) * 24,
72
  'type': 'historical',
@@ -85,7 +85,6 @@ class TobaccoAnalyzer:
85
  response = requests.get(forecast_url)
86
  if response.status_code == 200:
87
  data = response.json()
88
- # Group forecast data by day
89
  daily_forecasts = {}
90
 
91
  for item in data['list']:
@@ -97,21 +96,25 @@ class TobaccoAnalyzer:
97
  'temps': [],
98
  'humidity': [],
99
  'rainfall': 0,
100
- 'descriptions': []
 
 
101
  }
102
 
103
  daily_forecasts[day_key]['temps'].append(float(item['main']['temp']))
104
  daily_forecasts[day_key]['humidity'].append(float(item['main']['humidity']))
105
  daily_forecasts[day_key]['rainfall'] += float(item.get('rain', {}).get('3h', 0))
106
  daily_forecasts[day_key]['descriptions'].append(item['weather'][0]['description'])
 
 
107
 
108
  # Create daily forecast entries
109
  for day_key, day_data in daily_forecasts.items():
110
  forecast = {
111
  'date': datetime.combine(day_key, datetime.min.time()),
112
  'temperature': np.mean(day_data['temps']),
113
- 'temp_min': min(day_data['temps']),
114
- 'temp_max': max(day_data['temps']),
115
  'humidity': np.mean(day_data['humidity']),
116
  'rainfall': day_data['rainfall'],
117
  'type': 'forecast',
@@ -119,10 +122,39 @@ class TobaccoAnalyzer:
119
  }
120
  forecast_data.append(forecast)
121
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  except Exception as e:
123
  print(f"Error fetching forecast data: {e}")
124
 
125
- # Combine all data
126
  all_data = pd.DataFrame(historical_data + forecast_data)
127
 
128
  if not all_data.empty:
@@ -134,7 +166,10 @@ class TobaccoAnalyzer:
134
  # Sort by date
135
  all_data = all_data.sort_values('date')
136
 
137
- # Add additional columns
 
 
 
138
  all_data['month'] = all_data['date'].dt.month
139
  all_data['season'] = all_data['month'].map(self.tanzania_seasons)
140
 
@@ -143,119 +178,77 @@ class TobaccoAnalyzer:
143
  all_data['humidity_7day_avg'] = all_data['humidity'].rolling(window=7, min_periods=1).mean()
144
  all_data['rainfall_7day_avg'] = all_data['rainfall'].rolling(window=7, min_periods=1).mean()
145
 
146
- # Calculate daily suitability
147
  all_data['daily_suitability'] = self.calculate_daily_suitability(all_data)
148
-
149
- # Calculate NDVI
150
  all_data['estimated_ndvi'] = self.estimate_ndvi(all_data)
151
 
152
- # Group by date to get daily values while preserving types
153
- daily_data = pd.DataFrame()
154
- daily_data['date'] = all_data['date'].dt.date.unique()
155
- daily_data = daily_data.set_index('date')
 
 
 
 
 
156
 
157
- # Aggregate numeric columns
158
- numeric_aggs = {
159
- 'temperature': 'mean',
160
- 'humidity': 'mean',
161
- 'rainfall': 'sum',
162
- 'temp_min': 'min',
163
- 'temp_max': 'max',
164
- 'temp_7day_avg': 'last',
165
- 'humidity_7day_avg': 'last',
166
- 'rainfall_7day_avg': 'last',
167
- 'daily_suitability': 'mean',
168
- 'estimated_ndvi': 'mean'
169
- }
170
 
171
- # Aggregate categoric columns
172
- categoric_aggs = {
173
- 'type': 'first',
174
- 'description': 'first',
175
- 'season': 'first'
176
- }
177
 
178
- # Perform aggregations separately
179
- numeric_data = all_data.groupby(all_data['date'].dt.date).agg(numeric_aggs)
180
- categoric_data = all_data.groupby(all_data['date'].dt.date).agg(categoric_aggs)
 
 
181
 
182
- # Combine the results
183
- daily_data = pd.concat([numeric_data, categoric_data], axis=1)
184
- daily_data = daily_data.reset_index()
185
- daily_data['date'] = pd.to_datetime(daily_data['date'])
186
-
187
- return daily_data
188
-
189
- return pd.DataFrame()
190
-
191
- def get_weather_description(self, temp, humidity, rainfall):
192
- """Generate weather description based on conditions"""
193
- if rainfall > 5:
194
- return "Heavy Rain"
195
- elif rainfall > 0:
196
- return "Light Rain"
197
- elif humidity > 80:
198
- return "Humid"
199
- elif temp > 30:
200
- return "Hot"
201
- elif temp < 20:
202
- return "Cool"
203
- else:
204
- return "Fair"
205
 
206
  def estimate_ndvi(self, weather_data):
207
- """Estimate NDVI based on weather conditions with patterns"""
208
- # Base calculation
209
- normalized_temp = (weather_data['temperature'] - 15) / (30 - 15)
210
- normalized_humidity = (weather_data['humidity'] - 50) / (80 - 50)
211
- normalized_rainfall = weather_data['rainfall'] / 5
212
-
213
- # Season adjustment factors
214
- season_factors = {
215
- 'Main': 1.0,
216
- 'Early': 0.8,
217
- 'Late': 0.7,
218
- 'Dry': 0.5
219
- }
220
-
221
- # Apply season adjustments with smooth transitions
222
- season_multiplier = weather_data['season'].map(season_factors)
223
-
224
- # Calculate base NDVI
225
- base_ndvi = (
226
- 0.4 * normalized_temp +
227
- 0.3 * normalized_humidity +
228
- 0.3 * normalized_rainfall
229
- ) * season_multiplier
230
-
231
- # Add slight random variation to make it more realistic
232
- variation = np.random.normal(0, 0.05, size=len(base_ndvi))
233
-
234
- # Combine and clip to valid range
235
- return np.clip(base_ndvi + variation, -1, 1)
236
 
237
- def calculate_daily_suitability(self, df):
238
- """Calculate daily growing suitability with patterns"""
239
- # Temperature suitability
240
- temp_suit = 1 - np.abs((df['temperature'] - 25) / 10) # Optimal at 25°C
241
-
242
- # Humidity suitability
243
- humidity_suit = 1 - np.abs((df['humidity'] - 70) / 30) # Optimal at 70%
244
-
245
- # Rainfall suitability with diminishing returns
246
- rainfall_suit = 1 - np.exp(-df['rainfall'] / 2)
247
-
248
- # Combine with weights and add slight variation
249
- base_suit = (
250
- 0.4 * temp_suit +
251
- 0.3 * humidity_suit +
252
- 0.3 * rainfall_suit
253
- )
254
-
255
- # Add small random variation
256
- variation = np.random.normal(0, 0.05, size=len(base_suit))
257
-
258
- return np.clip(base_suit + variation, 0, 1)
 
259
 
260
  def analyze_trends(self, df):
261
  """Analyze weather trends and patterns"""
@@ -271,24 +264,26 @@ class TobaccoAnalyzer:
271
  'temperature': {
272
  'mean': historical['temperature'].mean(),
273
  'std': historical['temperature'].std(),
274
- 'trend': stats.linregress(range(len(historical)), historical['temperature'])[0],
275
- 'daily_range': (historical['temp_max'] - historical['temp_min']).mean()
276
  },
277
  'humidity': {
278
  'mean': historical['humidity'].mean(),
279
  'std': historical['humidity'].std(),
280
- 'trend': stats.linregress(range(len(historical)), historical['humidity'])[0]
 
281
  },
282
  'rainfall': {
283
  'mean': historical['rainfall'].mean(),
284
  'std': historical['rainfall'].std(),
285
- 'trend': stats.linregress(range(len(historical)), historical['rainfall'])[0],
286
- 'rainy_days': (historical['rainfall'] > 0).sum()
287
  },
288
  'ndvi': {
289
  'mean': historical['estimated_ndvi'].mean(),
290
  'std': historical['estimated_ndvi'].std(),
291
- 'trend': stats.linregress(range(len(historical)), historical['estimated_ndvi'])[0]
 
292
  }
293
  }
294
  }
@@ -297,158 +292,24 @@ class TobaccoAnalyzer:
297
  analysis['forecast'] = {
298
  'temperature': {
299
  'mean': forecast['temperature'].mean(),
300
- 'std': forecast['temperature'].std(),
301
- 'daily_range': (forecast['temp_max'] - forecast['temp_min']).mean()
302
  },
303
  'humidity': {
304
  'mean': forecast['humidity'].mean(),
305
  'std': forecast['humidity'].std()
306
- },
307
  'rainfall': {
308
  'mean': forecast['rainfall'].mean(),
309
- 'std': forecast['rainfall'].std(),
310
- 'rainy_days': (forecast['rainfall'] > 0).sum()
311
  },
312
  'ndvi': {
313
  'mean': forecast['estimated_ndvi'].mean(),
314
  'std': forecast['estimated_ndvi'].std()
315
- },
316
- 'confidence': {
317
- 'short_term': 0.9, # First 5 days
318
- 'medium_term': 0.7, # 6-15 days
319
- 'long_term': 0.5 # Beyond 15 days
320
  }
321
  }
322
-
323
  return analysis
 
324
  except Exception as e:
325
  print(f"Error in trend analysis: {e}")
326
- return None
327
-
328
- def calculate_season_factor(self, date):
329
- """Calculate seasonal influence factor"""
330
- day_of_year = date.timetuple().tm_yday
331
- season_phase = 2 * np.pi * day_of_year / 365
332
-
333
- # Base seasonal factor
334
- base_factor = np.sin(season_phase)
335
-
336
- # Adjust for Tanzania's specific seasons
337
- month = date.month
338
- if month in [12, 1, 2]: # Main growing season
339
- season_modifier = 1.2
340
- elif month in [3, 4, 5]: # Late season
341
- season_modifier = 0.8
342
- elif month in [6, 7, 8]: # Dry season
343
- season_modifier = 0.5
344
- else: # Early season
345
- season_modifier = 0.9
346
-
347
- return base_factor * season_modifier
348
-
349
- def calculate_daily_pattern(self, hour, base_value, amplitude=1.0):
350
- """Calculate daily cyclic pattern"""
351
- hour_phase = 2 * np.pi * hour / 24
352
- return base_value + amplitude * np.sin(hour_phase - np.pi/2)
353
-
354
- def get_weather_risk_factors(self, df):
355
- """Analyze weather-related risk factors"""
356
- risks = []
357
-
358
- # Temperature risks
359
- temp_mean = df['temperature'].mean()
360
- temp_std = df['temperature'].std()
361
- if temp_mean > self.optimal_conditions['temperature']['max']:
362
- risks.append(('High Temperature Risk', 'Average temperature above optimal range'))
363
- elif temp_mean < self.optimal_conditions['temperature']['min']:
364
- risks.append(('Low Temperature Risk', 'Average temperature below optimal range'))
365
- if temp_std > 5:
366
- risks.append(('Temperature Volatility Risk', 'High temperature variations observed'))
367
-
368
- # Humidity risks
369
- humidity_mean = df['humidity'].mean()
370
- if humidity_mean > self.optimal_conditions['humidity']['max']:
371
- risks.append(('High Humidity Risk', 'Average humidity above optimal range'))
372
- elif humidity_mean < self.optimal_conditions['humidity']['min']:
373
- risks.append(('Low Humidity Risk', 'Average humidity below optimal range'))
374
-
375
- # Rainfall risks
376
- daily_rainfall = df.groupby(df['date'].dt.date)['rainfall'].sum()
377
- rainy_days = (daily_rainfall > 0).sum()
378
- total_rainfall = daily_rainfall.sum()
379
-
380
- if total_rainfall < self.optimal_conditions['rainfall']['min'] * len(daily_rainfall):
381
- risks.append(('Drought Risk', 'Insufficient rainfall observed'))
382
- elif total_rainfall > self.optimal_conditions['rainfall']['max'] * len(daily_rainfall):
383
- risks.append(('Flood Risk', 'Excessive rainfall observed'))
384
-
385
- if rainy_days < len(daily_rainfall) * 0.2:
386
- risks.append(('Rainfall Distribution Risk', 'Too few rainy days'))
387
-
388
- # NDVI risks
389
- ndvi_mean = df['estimated_ndvi'].mean()
390
- if ndvi_mean < self.optimal_conditions['ndvi']['min']:
391
- risks.append(('Vegetation Health Risk', 'Low vegetation health indicated by NDVI'))
392
-
393
- # Season-specific risks
394
- current_season = df['season'].iloc[-1]
395
- if current_season == 'Dry':
396
- risks.append(('Seasonal Risk', 'Currently in dry season'))
397
-
398
- return risks
399
-
400
- def calculate_risk_score(self, df):
401
- """Calculate overall risk score based on all factors"""
402
- risk_score = 0
403
- weights = {
404
- 'temperature': 0.3,
405
- 'humidity': 0.2,
406
- 'rainfall': 0.2,
407
- 'ndvi': 0.2,
408
- 'season': 0.1
409
- }
410
-
411
- # Temperature component
412
- temp_mean = df['temperature'].mean()
413
- temp_optimal_range = self.optimal_conditions['temperature']
414
- temp_score = 1 - min(abs(temp_mean - np.mean([temp_optimal_range['min'],
415
- temp_optimal_range['max']])) / 10, 1)
416
-
417
- # Humidity component
418
- humidity_mean = df['humidity'].mean()
419
- humidity_optimal_range = self.optimal_conditions['humidity']
420
- humidity_score = 1 - min(abs(humidity_mean - np.mean([humidity_optimal_range['min'],
421
- humidity_optimal_range['max']])) / 20, 1)
422
-
423
- # Rainfall component
424
- daily_rainfall = df.groupby(df['date'].dt.date)['rainfall'].sum()
425
- rainfall_optimal_range = self.optimal_conditions['rainfall']
426
- rainfall_score = 1 - min(abs(daily_rainfall.mean() - np.mean([rainfall_optimal_range['min'],
427
- rainfall_optimal_range['max']])) / 5, 1)
428
-
429
- # NDVI component
430
- ndvi_mean = df['estimated_ndvi'].mean()
431
- ndvi_optimal_range = self.optimal_conditions['ndvi']
432
- ndvi_score = 1 - min(abs(ndvi_mean - np.mean([ndvi_optimal_range['min'],
433
- ndvi_optimal_range['max']])) / 0.3, 1)
434
-
435
- # Season component
436
- current_season = df['season'].iloc[-1]
437
- season_scores = {
438
- 'Main': 1.0,
439
- 'Early': 0.8,
440
- 'Late': 0.6,
441
- 'Dry': 0.4
442
- }
443
- season_score = season_scores.get(current_season, 0.5)
444
-
445
- # Calculate weighted score
446
- risk_score = (
447
- weights['temperature'] * temp_score +
448
- weights['humidity'] * humidity_score +
449
- weights['rainfall'] * rainfall_score +
450
- weights['ndvi'] * ndvi_score +
451
- weights['season'] * season_score
452
- )
453
-
454
- return np.clip(risk_score, 0, 1)
 
66
  data = response.json()
67
  weather_data = {
68
  'date': date,
69
+ 'temperature': float(data['main']['temp']),
70
  'humidity': float(data['main']['humidity']),
71
  'rainfall': float(data.get('rain', {}).get('1h', 0)) * 24,
72
  'type': 'historical',
 
85
  response = requests.get(forecast_url)
86
  if response.status_code == 200:
87
  data = response.json()
 
88
  daily_forecasts = {}
89
 
90
  for item in data['list']:
 
96
  'temps': [],
97
  'humidity': [],
98
  'rainfall': 0,
99
+ 'descriptions': [],
100
+ 'temp_mins': [],
101
+ 'temp_maxs': []
102
  }
103
 
104
  daily_forecasts[day_key]['temps'].append(float(item['main']['temp']))
105
  daily_forecasts[day_key]['humidity'].append(float(item['main']['humidity']))
106
  daily_forecasts[day_key]['rainfall'] += float(item.get('rain', {}).get('3h', 0))
107
  daily_forecasts[day_key]['descriptions'].append(item['weather'][0]['description'])
108
+ daily_forecasts[day_key]['temp_mins'].append(float(item['main']['temp_min']))
109
+ daily_forecasts[day_key]['temp_maxs'].append(float(item['main']['temp_max']))
110
 
111
  # Create daily forecast entries
112
  for day_key, day_data in daily_forecasts.items():
113
  forecast = {
114
  'date': datetime.combine(day_key, datetime.min.time()),
115
  'temperature': np.mean(day_data['temps']),
116
+ 'temp_min': min(day_data['temp_mins']),
117
+ 'temp_max': max(day_data['temp_maxs']),
118
  'humidity': np.mean(day_data['humidity']),
119
  'rainfall': day_data['rainfall'],
120
  'type': 'forecast',
 
122
  }
123
  forecast_data.append(forecast)
124
 
125
+ # Generate extended forecast using trends
126
+ if forecast_data:
127
+ last_date = max(d['date'] for d in forecast_data)
128
+ temp_trend = 0
129
+ humidity_trend = 0
130
+ rainfall_trend = 0
131
+
132
+ if len(historical_data) > 1:
133
+ historical_df = pd.DataFrame(historical_data)
134
+ temp_trend = stats.linregress(range(len(historical_df)), historical_df['temperature'])[0]
135
+ humidity_trend = stats.linregress(range(len(historical_df)), historical_df['humidity'])[0]
136
+ rainfall_trend = stats.linregress(range(len(historical_df)), historical_df['rainfall'])[0]
137
+
138
+ for day in range(1, forecast_days - len(forecast_data)):
139
+ base_forecast = forecast_data[-1]
140
+ date = last_date + timedelta(days=day)
141
+
142
+ extended_forecast = {
143
+ 'date': date,
144
+ 'temperature': base_forecast['temperature'] + temp_trend * day,
145
+ 'temp_min': base_forecast['temp_min'] + temp_trend * day,
146
+ 'temp_max': base_forecast['temp_max'] + temp_trend * day,
147
+ 'humidity': base_forecast['humidity'] + humidity_trend * day,
148
+ 'rainfall': max(0, base_forecast['rainfall'] + rainfall_trend * day),
149
+ 'type': 'forecast_extended',
150
+ 'description': 'Extended Forecast'
151
+ }
152
+ forecast_data.append(extended_forecast)
153
+
154
  except Exception as e:
155
  print(f"Error fetching forecast data: {e}")
156
 
157
+ # Combine and process all data
158
  all_data = pd.DataFrame(historical_data + forecast_data)
159
 
160
  if not all_data.empty:
 
166
  # Sort by date
167
  all_data = all_data.sort_values('date')
168
 
169
+ # Calculate temperature range
170
+ all_data['temp_range'] = all_data['temp_max'] - all_data['temp_min']
171
+
172
+ # Add analysis columns
173
  all_data['month'] = all_data['date'].dt.month
174
  all_data['season'] = all_data['month'].map(self.tanzania_seasons)
175
 
 
178
  all_data['humidity_7day_avg'] = all_data['humidity'].rolling(window=7, min_periods=1).mean()
179
  all_data['rainfall_7day_avg'] = all_data['rainfall'].rolling(window=7, min_periods=1).mean()
180
 
181
+ # Calculate daily suitability and NDVI
182
  all_data['daily_suitability'] = self.calculate_daily_suitability(all_data)
 
 
183
  all_data['estimated_ndvi'] = self.estimate_ndvi(all_data)
184
 
185
+ return all_data
186
+
187
+ return pd.DataFrame()
188
+
189
+ def calculate_daily_suitability(self, df):
190
+ """Calculate daily growing suitability"""
191
+ try:
192
+ # Temperature suitability
193
+ temp_suit = 1 - np.clip(abs(df['temperature'] - 25) / 10, 0, 1)
194
 
195
+ # Temperature range suitability
196
+ temp_range_suit = 1 - np.clip(df['temp_range'] / 15, 0, 1)
 
 
 
 
 
 
 
 
 
 
 
197
 
198
+ # Humidity suitability
199
+ humidity_suit = 1 - np.clip(abs(df['humidity'] - 70) / 30, 0, 1)
 
 
 
 
200
 
201
+ # Rainfall suitability
202
+ daily_rainfall_target = (self.optimal_conditions['rainfall']['min'] +
203
+ self.optimal_conditions['rainfall']['max']) / 2
204
+ rainfall_suit = 1 - np.clip(abs(df['rainfall'] - daily_rainfall_target) /
205
+ daily_rainfall_target, 0, 1)
206
 
207
+ # Combine scores with weights
208
+ suitability = (
209
+ 0.35 * temp_suit +
210
+ 0.15 * temp_range_suit +
211
+ 0.25 * humidity_suit +
212
+ 0.25 * rainfall_suit
213
+ )
214
+
215
+ return np.clip(suitability, 0, 1)
216
+
217
+ except Exception as e:
218
+ print(f"Error calculating suitability: {e}")
219
+ return pd.Series(0.5, index=df.index)
 
 
 
 
 
 
 
 
 
 
220
 
221
  def estimate_ndvi(self, weather_data):
222
+ """Estimate NDVI based on weather conditions"""
223
+ try:
224
+ # Normalize weather parameters
225
+ normalized_temp = (weather_data['temperature'] - 15) / (30 - 15)
226
+ normalized_humidity = (weather_data['humidity'] - 50) / (80 - 50)
227
+ normalized_rainfall = weather_data['rainfall'] / 5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
 
229
+ # Season adjustment factors
230
+ season_factors = {
231
+ 'Main': 1.0,
232
+ 'Early': 0.8,
233
+ 'Late': 0.7,
234
+ 'Dry': 0.5
235
+ }
236
+
237
+ # Apply season adjustments
238
+ season_multiplier = weather_data['season'].map(season_factors)
239
+
240
+ # Calculate estimated NDVI
241
+ estimated_ndvi = (
242
+ 0.4 * normalized_temp +
243
+ 0.3 * normalized_humidity +
244
+ 0.3 * normalized_rainfall
245
+ ) * season_multiplier
246
+
247
+ return np.clip(estimated_ndvi, -1, 1)
248
+
249
+ except Exception as e:
250
+ print(f"Error estimating NDVI: {e}")
251
+ return pd.Series(0, index=weather_data.index)
252
 
253
  def analyze_trends(self, df):
254
  """Analyze weather trends and patterns"""
 
264
  'temperature': {
265
  'mean': historical['temperature'].mean(),
266
  'std': historical['temperature'].std(),
267
+ 'trend': stats.linregress(range(len(historical)),
268
+ historical['temperature'])[0]
269
  },
270
  'humidity': {
271
  'mean': historical['humidity'].mean(),
272
  'std': historical['humidity'].std(),
273
+ 'trend': stats.linregress(range(len(historical)),
274
+ historical['humidity'])[0]
275
  },
276
  'rainfall': {
277
  'mean': historical['rainfall'].mean(),
278
  'std': historical['rainfall'].std(),
279
+ 'trend': stats.linregress(range(len(historical)),
280
+ historical['rainfall'])[0]
281
  },
282
  'ndvi': {
283
  'mean': historical['estimated_ndvi'].mean(),
284
  'std': historical['estimated_ndvi'].std(),
285
+ 'trend': stats.linregress(range(len(historical)),
286
+ historical['estimated_ndvi'])[0]
287
  }
288
  }
289
  }
 
292
  analysis['forecast'] = {
293
  'temperature': {
294
  'mean': forecast['temperature'].mean(),
295
+ 'std': forecast['temperature'].std()
 
296
  },
297
  'humidity': {
298
  'mean': forecast['humidity'].mean(),
299
  'std': forecast['humidity'].std()
300
+ },
301
  'rainfall': {
302
  'mean': forecast['rainfall'].mean(),
303
+ 'std': forecast['rainfall'].std()
 
304
  },
305
  'ndvi': {
306
  'mean': forecast['estimated_ndvi'].mean(),
307
  'std': forecast['estimated_ndvi'].std()
 
 
 
 
 
308
  }
309
  }
310
+
311
  return analysis
312
+
313
  except Exception as e:
314
  print(f"Error in trend analysis: {e}")
315
+ return None