meccatronis commited on
Commit
85b135b
·
verified ·
1 Parent(s): 43e4e00

Upload test_system.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. test_system.py +509 -0
test_system.py ADDED
@@ -0,0 +1,509 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Comprehensive Test and Validation Script
4
+
5
+ Tests all components of the GPU monitoring and fan control system
6
+ to ensure proper functionality and integration.
7
+ """
8
+
9
+ import sys
10
+ import os
11
+ import time
12
+ import json
13
+ import logging
14
+ import subprocess
15
+ import threading
16
+ from pathlib import Path
17
+ from typing import Dict, List, Any, Optional
18
+
19
+ # Add the project directory to Python path
20
+ sys.path.insert(0, str(Path(__file__).parent))
21
+
22
+ from gpu_monitoring import GPUManager, GPUStatus
23
+ from gpu_fan_controller import FanController, FanMode, ProfileType
24
+ from alert_system import AlertManager, AlertThreshold
25
+ from performance_optimizer import SystemOptimizer
26
+ from web_interface import app as web_app
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ class SystemTester:
32
+ """Comprehensive system tester."""
33
+
34
+ def __init__(self):
35
+ self.test_results = []
36
+ self.gpu_manager = None
37
+ self.fan_controller = None
38
+ self.alert_manager = None
39
+ self.optimizer = None
40
+
41
+ # Setup logging
42
+ self.setup_logging()
43
+
44
+ def setup_logging(self):
45
+ """Setup test logging."""
46
+ logging.basicConfig(
47
+ level=logging.INFO,
48
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
49
+ handlers=[
50
+ logging.FileHandler('test_results.log'),
51
+ logging.StreamHandler(sys.stdout)
52
+ ]
53
+ )
54
+
55
+ def log_test(self, test_name: str, success: bool, message: str = ""):
56
+ """Log test result."""
57
+ status = "PASS" if success else "FAIL"
58
+ logger.info(f"[{status}] {test_name}: {message}")
59
+ self.test_results.append({
60
+ "test": test_name,
61
+ "success": success,
62
+ "message": message,
63
+ "timestamp": time.time()
64
+ })
65
+
66
+ def test_gpu_detection(self) -> bool:
67
+ """Test GPU detection and initialization."""
68
+ try:
69
+ self.gpu_manager = GPUManager()
70
+ success = self.gpu_manager.initialize()
71
+
72
+ if success:
73
+ gpus = self.gpu_manager.get_gpu_list()
74
+ gpu_info = self.gpu_manager.get_gpu_info()
75
+
76
+ self.log_test(
77
+ "GPU Detection",
78
+ True,
79
+ f"Found {len(gpus)} GPU(s): {', '.join(gpus)}"
80
+ )
81
+
82
+ # Test status collection
83
+ status = self.gpu_manager.get_status()
84
+ if status:
85
+ self.log_test(
86
+ "GPU Status Collection",
87
+ True,
88
+ f"Successfully collected status for {len(status)} GPU(s)"
89
+ )
90
+ return True
91
+ else:
92
+ self.log_test("GPU Status Collection", False, "No status data collected")
93
+ return False
94
+ else:
95
+ self.log_test("GPU Detection", False, "Failed to initialize GPU manager")
96
+ return False
97
+
98
+ except Exception as e:
99
+ self.log_test("GPU Detection", False, f"Exception: {e}")
100
+ return False
101
+
102
+ def test_fan_control(self) -> bool:
103
+ """Test fan control functionality."""
104
+ try:
105
+ self.fan_controller = FanController()
106
+ success = self.fan_controller.initialize()
107
+
108
+ if success:
109
+ # Test profile management
110
+ profiles = self.fan_controller.get_profiles()
111
+ self.log_test(
112
+ "Fan Profiles",
113
+ True,
114
+ f"Loaded {len(profiles)} profiles: {', '.join(profiles.keys())}"
115
+ )
116
+
117
+ # Test profile switching
118
+ if "balanced" in profiles:
119
+ success = self.fan_controller.set_profile("balanced")
120
+ self.log_test("Profile Switching", success, "Switched to balanced profile")
121
+
122
+ # Test manual control
123
+ self.fan_controller.set_manual_pwm(100)
124
+ time.sleep(1) # Allow time for change
125
+
126
+ status = self.fan_controller.get_status()
127
+ if status and status.manual_override:
128
+ self.log_test("Manual Control", True, "Manual PWM control working")
129
+ else:
130
+ self.log_test("Manual Control", False, "Manual control not working")
131
+
132
+ return True
133
+ else:
134
+ self.log_test("Fan Control", False, "Failed to initialize fan controller")
135
+ return False
136
+
137
+ except Exception as e:
138
+ self.log_test("Fan Control", False, f"Exception: {e}")
139
+ return False
140
+
141
+ def test_alert_system(self) -> bool:
142
+ """Test alert system functionality."""
143
+ try:
144
+ self.alert_manager = AlertManager()
145
+
146
+ # Test threshold management
147
+ threshold = AlertThreshold(
148
+ metric="temperature",
149
+ threshold=50.0,
150
+ operator=">=",
151
+ duration=5,
152
+ enabled=True,
153
+ cooldown=60
154
+ )
155
+
156
+ self.alert_manager.add_threshold(threshold)
157
+ self.log_test("Alert Thresholds", True, "Successfully added alert threshold")
158
+
159
+ # Test alert history
160
+ history = self.alert_manager.get_alert_history(1)
161
+ self.log_test("Alert History", True, f"Retrieved {len(history)} alerts from history")
162
+
163
+ return True
164
+
165
+ except Exception as e:
166
+ self.log_test("Alert System", False, f"Exception: {e}")
167
+ return False
168
+
169
+ def test_performance_optimizer(self) -> bool:
170
+ """Test performance optimization system."""
171
+ try:
172
+ self.optimizer = SystemOptimizer()
173
+
174
+ # Test profile management
175
+ profiles = self.optimizer.profiles
176
+ self.log_test(
177
+ "Optimization Profiles",
178
+ True,
179
+ f"Loaded {len(profiles)} profiles: {', '.join(profiles.keys())}"
180
+ )
181
+
182
+ # Test profile application
183
+ if "balanced" in profiles:
184
+ success = self.optimizer.apply_profile("balanced")
185
+ self.log_test("Profile Application", success, "Applied balanced profile")
186
+
187
+ # Test performance analytics
188
+ analytics = self.optimizer.get_performance_analytics(1)
189
+ if "system" in analytics:
190
+ self.log_test("Performance Analytics", True, "Successfully generated analytics")
191
+ else:
192
+ self.log_test("Performance Analytics", False, "Failed to generate analytics")
193
+
194
+ return True
195
+
196
+ except Exception as e:
197
+ self.log_test("Performance Optimizer", False, f"Exception: {e}")
198
+ return False
199
+
200
+ def test_web_interface(self) -> bool:
201
+ """Test web interface components."""
202
+ try:
203
+ # Test API endpoints
204
+ with web_app.test_client() as client:
205
+ # Test status endpoint
206
+ response = client.get('/api/status')
207
+ if response.status_code == 200:
208
+ self.log_test("Web API - Status", True, "Status endpoint working")
209
+ else:
210
+ self.log_test("Web API - Status", False, f"Status endpoint failed: {response.status_code}")
211
+
212
+ # Test GPU list endpoint
213
+ response = client.get('/api/gpus')
214
+ if response.status_code == 200:
215
+ self.log_test("Web API - GPUs", True, "GPU list endpoint working")
216
+ else:
217
+ self.log_test("Web API - GPUs", False, f"GPU list endpoint failed: {response.status_code}")
218
+
219
+ # Test fan profiles endpoint
220
+ response = client.get('/api/fan/profiles')
221
+ if response.status_code == 200:
222
+ self.log_test("Web API - Fan Profiles", True, "Fan profiles endpoint working")
223
+ else:
224
+ self.log_test("Web API - Fan Profiles", False, f"Fan profiles endpoint failed: {response.status_code}")
225
+
226
+ return True
227
+
228
+ except Exception as e:
229
+ self.log_test("Web Interface", False, f"Exception: {e}")
230
+ return False
231
+
232
+ def test_data_persistence(self) -> bool:
233
+ """Test data persistence and database functionality."""
234
+ try:
235
+ if not self.gpu_manager:
236
+ self.gpu_manager = GPUManager()
237
+ self.gpu_manager.initialize()
238
+
239
+ # Test data collection and storage
240
+ status = self.gpu_manager.get_status()
241
+ if status:
242
+ # Test historical data retrieval
243
+ for gpu_name in self.gpu_manager.get_gpu_list():
244
+ history = self.gpu_manager.get_historical_data(gpu_name, 1)
245
+ self.log_test(
246
+ f"Data Persistence - {gpu_name}",
247
+ True,
248
+ f"Retrieved {len(history)} historical records"
249
+ )
250
+
251
+ return True
252
+
253
+ except Exception as e:
254
+ self.log_test("Data Persistence", False, f"Exception: {e}")
255
+ return False
256
+
257
+ def test_system_integration(self) -> bool:
258
+ """Test system integration and component interaction."""
259
+ try:
260
+ # Test that all components can work together
261
+ components_working = []
262
+
263
+ if self.gpu_manager and self.gpu_manager.get_gpu_list():
264
+ components_working.append("GPU Manager")
265
+
266
+ if self.fan_controller and self.fan_controller.get_profiles():
267
+ components_working.append("Fan Controller")
268
+
269
+ if self.alert_manager:
270
+ components_working.append("Alert Manager")
271
+
272
+ if self.optimizer and self.optimizer.profiles:
273
+ components_working.append("Performance Optimizer")
274
+
275
+ success = len(components_working) >= 3 # At least 3 components should work
276
+ self.log_test(
277
+ "System Integration",
278
+ success,
279
+ f"Working components: {', '.join(components_working)}"
280
+ )
281
+
282
+ return success
283
+
284
+ except Exception as e:
285
+ self.log_test("System Integration", False, f"Exception: {e}")
286
+ return False
287
+
288
+ def test_configuration_files(self) -> bool:
289
+ """Test configuration file loading and validation."""
290
+ try:
291
+ config_files = [
292
+ "config/fan_profiles.json",
293
+ "config/monitoring.json",
294
+ "config/alerts.json",
295
+ "config/optimization.json"
296
+ ]
297
+
298
+ valid_configs = 0
299
+ for config_file in config_files:
300
+ if Path(config_file).exists():
301
+ try:
302
+ with open(config_file, 'r') as f:
303
+ json.load(f)
304
+ valid_configs += 1
305
+ except json.JSONDecodeError:
306
+ self.log_test(f"Config Validation - {config_file}", False, "Invalid JSON")
307
+ else:
308
+ self.log_test(f"Config File - {config_file}", False, "File not found")
309
+
310
+ success = valid_configs == len(config_files)
311
+ self.log_test(
312
+ "Configuration Files",
313
+ success,
314
+ f"Valid configs: {valid_configs}/{len(config_files)}"
315
+ )
316
+
317
+ return success
318
+
319
+ except Exception as e:
320
+ self.log_test("Configuration Files", False, f"Exception: {e}")
321
+ return False
322
+
323
+ def test_permissions(self) -> bool:
324
+ """Test system permissions for GPU access."""
325
+ try:
326
+ # Check if we can access GPU sysfs
327
+ gpu_paths = []
328
+ for card_path in Path("/sys/class/drm").glob("card*"):
329
+ device_path = card_path / "device"
330
+ if (device_path / "vendor").exists():
331
+ with open(device_path / "vendor", 'r') as f:
332
+ if f.read().strip() == "0x1002": # AMD
333
+ gpu_paths.append(str(device_path))
334
+
335
+ if gpu_paths:
336
+ # Check hwmon access
337
+ hwmon_accessible = 0
338
+ for device_path in gpu_paths:
339
+ hwmon_path = Path(device_path) / "hwmon"
340
+ if hwmon_path.exists():
341
+ hwmons = list(hwmon_path.glob("*"))
342
+ if hwmons:
343
+ # Test read access
344
+ temp_file = hwmons[0] / "temp1_input"
345
+ if temp_file.exists():
346
+ try:
347
+ with open(temp_file, 'r') as f:
348
+ f.read()
349
+ hwmon_accessible += 1
350
+ except PermissionError:
351
+ pass
352
+
353
+ success = hwmon_accessible > 0
354
+ self.log_test(
355
+ "Permissions",
356
+ success,
357
+ f"Accessible hwmon devices: {hwmon_accessible}/{len(gpu_paths)}"
358
+ )
359
+ return success
360
+ else:
361
+ self.log_test("Permissions", False, "No AMD GPUs detected")
362
+ return False
363
+
364
+ except Exception as e:
365
+ self.log_test("Permissions", False, f"Exception: {e}")
366
+ return False
367
+
368
+ def run_all_tests(self) -> Dict[str, Any]:
369
+ """Run all tests and return comprehensive results."""
370
+ logger.info("=" * 60)
371
+ logger.info("GPU Monitoring System - Comprehensive Test Suite")
372
+ logger.info("=" * 60)
373
+
374
+ # Run individual tests
375
+ tests = [
376
+ ("Configuration Files", self.test_configuration_files),
377
+ ("Permissions", self.test_permissions),
378
+ ("GPU Detection", self.test_gpu_detection),
379
+ ("Fan Control", self.test_fan_control),
380
+ ("Alert System", self.test_alert_system),
381
+ ("Performance Optimizer", self.test_performance_optimizer),
382
+ ("Web Interface", self.test_web_interface),
383
+ ("Data Persistence", self.test_data_persistence),
384
+ ("System Integration", self.test_system_integration),
385
+ ]
386
+
387
+ for test_name, test_func in tests:
388
+ logger.info(f"\nRunning: {test_name}")
389
+ logger.info("-" * 40)
390
+ try:
391
+ test_func()
392
+ except Exception as e:
393
+ self.log_test(test_name, False, f"Test framework error: {e}")
394
+
395
+ # Calculate results
396
+ total_tests = len(self.test_results)
397
+ passed_tests = sum(1 for result in self.test_results if result['success'])
398
+ failed_tests = total_tests - passed_tests
399
+
400
+ # Generate summary
401
+ summary = {
402
+ "total_tests": total_tests,
403
+ "passed": passed_tests,
404
+ "failed": failed_tests,
405
+ "success_rate": (passed_tests / total_tests * 100) if total_tests > 0 else 0,
406
+ "test_results": self.test_results,
407
+ "timestamp": time.time(),
408
+ "system_info": self.get_system_info()
409
+ }
410
+
411
+ # Save results
412
+ self.save_test_results(summary)
413
+
414
+ # Print summary
415
+ logger.info("\n" + "=" * 60)
416
+ logger.info("TEST SUMMARY")
417
+ logger.info("=" * 60)
418
+ logger.info(f"Total Tests: {total_tests}")
419
+ logger.info(f"Passed: {passed_tests}")
420
+ logger.info(f"Failed: {failed_tests}")
421
+ logger.info(f"Success Rate: {summary['success_rate']:.1f}%")
422
+
423
+ if failed_tests > 0:
424
+ logger.info("\nFailed Tests:")
425
+ for result in self.test_results:
426
+ if not result['success']:
427
+ logger.info(f" - {result['test']}: {result['message']}")
428
+
429
+ logger.info("=" * 60)
430
+
431
+ return summary
432
+
433
+ def get_system_info(self) -> Dict[str, Any]:
434
+ """Get system information for test context."""
435
+ try:
436
+ import psutil
437
+
438
+ return {
439
+ "platform": sys.platform,
440
+ "python_version": sys.version,
441
+ "cpu_count": psutil.cpu_count(),
442
+ "memory_total": psutil.virtual_memory().total // (1024**3), # GB
443
+ "disk_total": psutil.disk_usage('/').total // (1024**3), # GB
444
+ "uptime": time.time() - psutil.boot_time()
445
+ }
446
+ except:
447
+ return {
448
+ "platform": sys.platform,
449
+ "python_version": sys.version
450
+ }
451
+
452
+ def save_test_results(self, summary: Dict[str, Any]):
453
+ """Save test results to file."""
454
+ try:
455
+ # Save detailed results
456
+ with open('test_results_detailed.json', 'w') as f:
457
+ json.dump(summary, f, indent=2, default=str)
458
+
459
+ # Save summary
460
+ with open('test_summary.txt', 'w') as f:
461
+ f.write(f"GPU Monitoring System Test Results\n")
462
+ f.write(f"====================================\n")
463
+ f.write(f"Timestamp: {time.strftime('%Y-%m-%d %H:%M:%S')}\n")
464
+ f.write(f"Total Tests: {summary['total_tests']}\n")
465
+ f.write(f"Passed: {summary['passed']}\n")
466
+ f.write(f"Failed: {summary['failed']}\n")
467
+ f.write(f"Success Rate: {summary['success_rate']:.1f}%\n\n")
468
+
469
+ f.write("Detailed Results:\n")
470
+ f.write("================\n")
471
+ for result in summary['test_results']:
472
+ status = "PASS" if result['success'] else "FAIL"
473
+ f.write(f"{status}: {result['test']} - {result['message']}\n")
474
+
475
+ logger.info("Test results saved to test_results_detailed.json and test_summary.txt")
476
+
477
+ except Exception as e:
478
+ logger.error(f"Failed to save test results: {e}")
479
+
480
+
481
+ def main():
482
+ """Main test execution."""
483
+ print("GPU Monitoring System - Comprehensive Test Suite")
484
+ print("=" * 60)
485
+ print("This test suite will validate all components of the")
486
+ print("GPU monitoring and fan control system.")
487
+ print()
488
+
489
+ # Check if running as root for full functionality
490
+ if os.geteuid() != 0:
491
+ print("WARNING: Not running as root. Some tests may fail due to permission issues.")
492
+ print("For complete testing, run: sudo python3 test_system.py")
493
+ print()
494
+
495
+ # Run tests
496
+ tester = SystemTester()
497
+ results = tester.run_all_tests()
498
+
499
+ # Exit with appropriate code
500
+ if results['failed'] > 0:
501
+ print(f"\nSome tests failed. Please review the results and fix any issues.")
502
+ sys.exit(1)
503
+ else:
504
+ print(f"\nAll tests passed! The system is ready for use.")
505
+ sys.exit(0)
506
+
507
+
508
+ if __name__ == "__main__":
509
+ main()