""" Integration tests for the Daily Household Electricity Consumption Predictor. This module contains integration tests that test the complete workflow from data generation through model training to prediction. """ import pytest import pandas as pd import numpy as np import tempfile import os from src.data_generator import DataGenerator from src.model import ElectricityConsumptionModel from src.app import ElectricityPredictorApp class TestIntegration: """Integration tests for the complete system.""" def setup_method(self): """Set up test environment for each test method.""" self.generator = DataGenerator(seed=42) self.model = ElectricityConsumptionModel() self.app = ElectricityPredictorApp() def test_complete_workflow(self): """Test the complete workflow from data generation to prediction.""" # Step 1: Generate data data = self.generator.generate_data(n_samples=1000, noise_level=0.1) assert len(data) == 1000 assert all( col in data.columns for col in ["temperature", "day_of_week", "major_event", "consumption_kwh"] ) # Step 2: Split data train_data, val_data, test_data = self.generator.split_data(data) assert len(train_data) + len(val_data) + len(test_data) == len(data) # Step 3: Train model X_train = train_data.drop("consumption_kwh", axis=1) y_train = train_data[["consumption_kwh"]] train_metrics = self.model.train(X_train, y_train) assert self.model.is_trained assert "train_r2" in train_metrics assert train_metrics["train_r2"] > 0.3 # Reasonable performance # Step 4: Evaluate model X_test = test_data.drop("consumption_kwh", axis=1) y_test = test_data[["consumption_kwh"]] test_metrics = self.model.evaluate(X_test, y_test) assert "test_r2" in test_metrics assert test_metrics["test_r2"] > 0.3 # Reasonable performance # Step 5: Make predictions prediction1 = self.model.predict(25.0, "Monday", 0) prediction2 = self.model.predict(30.0, "Saturday", 1) assert prediction1 > 0 assert prediction2 > 0 assert ( prediction2 > prediction1 ) # Higher temp + weekend + event should increase consumption def test_app_integration(self): """Test the complete app workflow.""" # Test data generation and training through the app data_info, training_metrics, evaluation_metrics = self.app.generate_and_train( n_samples=500, noise_level=0.1, train_size=0.7, val_size=0.15, test_size=0.15, ) assert self.app.is_model_trained assert "Data Generated Successfully!" in data_info assert "Training Metrics:" in training_metrics assert "Test Set Evaluation:" in evaluation_metrics # Test prediction through the app prediction_result = self.app.predict_consumption(25.0, "Monday", False) assert "Estimated Daily Electricity Consumption:" in prediction_result assert "Temperature: 25.0°C" in prediction_result # Test model info through the app model_info = self.app.get_model_info() assert "Model Information:" in model_info assert "Feature Coefficients:" in model_info def test_model_persistence(self): """Test model saving and loading.""" # Generate data and train model data = self.generator.generate_data(n_samples=500) train_data, _, _ = self.generator.split_data(data) X_train = train_data.drop("consumption_kwh", axis=1) y_train = train_data[["consumption_kwh"]] self.model.train(X_train, y_train) # Save model with tempfile.NamedTemporaryFile(suffix=".joblib", delete=False) as tmp_file: model_path = tmp_file.name try: self.model.save_model(model_path) assert os.path.exists(model_path) # Load model in new instance new_model = ElectricityConsumptionModel() new_model.load_model(model_path) assert new_model.is_trained # Test predictions are identical pred1 = self.model.predict(25.0, "Monday", 0) pred2 = new_model.predict(25.0, "Monday", 0) assert abs(pred1 - pred2) < 1e-10 finally: if os.path.exists(model_path): os.unlink(model_path) def test_data_persistence(self): """Test data saving and loading.""" # Generate data data = self.generator.generate_data(n_samples=100) # Save data with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp_file: data_path = tmp_file.name try: self.generator.save_data(data, data_path) assert os.path.exists(data_path) # Load data loaded_data = self.generator.load_data(data_path) # Check data is identical pd.testing.assert_frame_equal(data, loaded_data) finally: if os.path.exists(data_path): os.unlink(data_path) def test_model_performance_consistency(self): """Test that model performance is consistent across runs.""" # Generate data data = self.generator.generate_data(n_samples=1000, noise_level=0.1) train_data, _, test_data = self.generator.split_data(data) # Train model multiple times with same data X_train = train_data.drop("consumption_kwh", axis=1) y_train = train_data[["consumption_kwh"]] X_test = test_data.drop("consumption_kwh", axis=1) y_test = test_data[["consumption_kwh"]] r2_scores = [] for _ in range(3): model = ElectricityConsumptionModel() model.train(X_train, y_train) metrics = model.evaluate(X_test, y_test) r2_scores.append(metrics["test_r2"]) # R² scores should be very similar (within 0.01) assert max(r2_scores) - min(r2_scores) < 0.01 def test_feature_importance_consistency(self): """Test that feature importance is consistent with domain knowledge.""" # Generate data and train model data = self.generator.generate_data(n_samples=1000) train_data, _, _ = self.generator.split_data(data) X_train = train_data.drop("consumption_kwh", axis=1) y_train = train_data[["consumption_kwh"]] self.model.train(X_train, y_train) # Get coefficients coefficients = self.model.get_model_coefficients() # Find temperature coefficient temp_idx = coefficients["feature_names"].index("temperature") temp_coef = coefficients["coefficients"][temp_idx] # Find major event coefficient event_idx = coefficients["feature_names"].index("major_event") event_coef = coefficients["coefficients"][event_idx] # Temperature should have positive effect (higher temp = higher consumption) assert temp_coef > 0 # Major event should have positive effect (events increase consumption) assert event_coef > 0 def test_prediction_bounds(self): """Test that predictions are within reasonable bounds.""" # Generate data and train model data = self.generator.generate_data(n_samples=1000) train_data, _, _ = self.generator.split_data(data) X_train = train_data.drop("consumption_kwh", axis=1) y_train = train_data[["consumption_kwh"]] self.model.train(X_train, y_train) # Test predictions across different inputs predictions = [] for temp in [15, 20, 25, 30, 35]: for day in [ "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday", ]: for event in [0, 1]: pred = self.model.predict(temp, day, event) predictions.append(pred) # All predictions should be positive assert all(p > 0 for p in predictions) # Predictions should be within reasonable range (5-50 kWh) assert all(5 <= p <= 50 for p in predictions) def test_data_quality_checks(self): """Test that generated data meets quality requirements.""" # Generate data data = self.generator.generate_data(n_samples=1000) # Check for missing values assert not data.isnull().any().any() # Check data types assert data["temperature"].dtype in [np.float64, np.float32] assert data["day_of_week"].dtype == "object" assert data["major_event"].dtype in [np.int64, np.int32] assert data["consumption_kwh"].dtype in [np.float64, np.float32] # Check value ranges assert data["temperature"].min() >= 15 assert data["temperature"].max() <= 35 assert all(data["major_event"].isin([0, 1])) assert all(data["consumption_kwh"] > 0) # Check day of week values valid_days = [ "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday", ] assert all(day in valid_days for day in data["day_of_week"].unique()) # Check correlations make sense temp_consumption_corr = data["temperature"].corr(data["consumption_kwh"]) assert temp_consumption_corr > 0 # Positive correlation def test_error_handling(self): """Test error handling in the complete workflow.""" # Test with invalid temperature with pytest.raises(ValueError): self.model.predict(10.0, "Monday", 0) # Temperature too low with pytest.raises(ValueError): self.model.predict(40.0, "Monday", 0) # Temperature too high # Test with invalid day with pytest.raises(ValueError): self.model.predict(25.0, "InvalidDay", 0) # Test with invalid major event with pytest.raises(ValueError): self.model.predict(25.0, "Monday", 2) # Invalid value # Test prediction without training untrained_model = ElectricityConsumptionModel() with pytest.raises(ValueError): untrained_model.predict(25.0, "Monday", 0) def test_app_state_management(self): """Test that app state is properly managed.""" # Initially not trained assert not self.app.is_model_trained # After training self.app.generate_and_train(500, 0.1, 0.7, 0.15, 0.15) assert self.app.is_model_trained # Check that data is stored assert hasattr(self.app, "train_data") assert hasattr(self.app, "val_data") assert hasattr(self.app, "test_data") # Check data sizes assert len(self.app.train_data) > 0 assert len(self.app.val_data) > 0 assert len(self.app.test_data) > 0