|
|
|
""" |
|
TTRLVR + AZR ํ๊ฒฝ ๊ฒ์ฆ ์คํฌ๋ฆฝํธ |
|
|
|
์ค์ ์คํ ํ๊ฒฝ์์ ํ์ํ ๋ชจ๋ ์ปดํฌ๋ํธ๊ฐ ์ฌ๋ฐ๋ฅด๊ฒ ์ค์ ๋์ด ์๋์ง ํ์ธํฉ๋๋ค: |
|
1. Python ํจํค์ง ๋ฐ ๋ฒ์ ํ์ธ |
|
2. GPU ๋ฐ CUDA ํ๊ฒฝ ํ์ธ |
|
3. ํ์ผ ๊ฒฝ๋ก ๋ฐ ๊ถํ ํ์ธ |
|
4. ๋ชจ๋ธ ๋ก๋ฉ ํ
์คํธ |
|
5. AZR ์ค์ ํ์ผ ๊ฒ์ฆ |
|
6. ๊ฐ๋จํ ํ์ดํ๋ผ์ธ ์คํ ํ
์คํธ |
|
""" |
|
|
|
import os |
|
import sys |
|
import json |
|
import subprocess |
|
import tempfile |
|
import traceback |
|
from pathlib import Path |
|
from datetime import datetime |
|
|
|
|
|
sys.path.append('/home/ubuntu/RLVR/TestTime-RLVR-v2') |
|
|
|
class EnvironmentValidator: |
|
"""ํ๊ฒฝ ๊ฒ์ฆ ํด๋์ค""" |
|
|
|
def __init__(self): |
|
self.results = { |
|
'timestamp': datetime.now().isoformat(), |
|
'tests': {}, |
|
'overall_success': False, |
|
'recommendations': [] |
|
} |
|
|
|
def log_test(self, test_name: str, success: bool, message: str, details: str = None): |
|
"""ํ
์คํธ ๊ฒฐ๊ณผ ๋ก๊น
""" |
|
|
|
status = "โ
PASS" if success else "โ FAIL" |
|
print(f"{status} {test_name}: {message}") |
|
|
|
if details: |
|
print(f" Details: {details}") |
|
|
|
self.results['tests'][test_name] = { |
|
'success': success, |
|
'message': message, |
|
'details': details |
|
} |
|
|
|
if not success: |
|
print() |
|
|
|
def add_recommendation(self, recommendation: str): |
|
"""๊ถ์ฅ์ฌํญ ์ถ๊ฐ""" |
|
self.results['recommendations'].append(recommendation) |
|
print(f"๐ก Recommendation: {recommendation}") |
|
|
|
def test_python_packages(self): |
|
"""Python ํจํค์ง ํ์ธ""" |
|
|
|
required_packages = { |
|
'torch': '2.0.0', |
|
'transformers': '4.30.0', |
|
'pandas': '1.5.0', |
|
'numpy': '1.21.0', |
|
'vllm': '0.3.0' |
|
} |
|
|
|
missing_packages = [] |
|
version_issues = [] |
|
|
|
for package, min_version in required_packages.items(): |
|
try: |
|
if package == 'vllm': |
|
|
|
try: |
|
import vllm |
|
version = vllm.__version__ |
|
except ImportError: |
|
self.add_recommendation(f"Consider installing vLLM for better GPU performance: pip install vllm") |
|
continue |
|
else: |
|
exec(f"import {package}") |
|
version = eval(f"{package}.__version__") |
|
|
|
|
|
if version < min_version: |
|
version_issues.append(f"{package}: {version} < {min_version}") |
|
|
|
except ImportError: |
|
missing_packages.append(package) |
|
except Exception as e: |
|
version_issues.append(f"{package}: Error checking version - {e}") |
|
|
|
if missing_packages: |
|
self.log_test( |
|
"Python Packages", |
|
False, |
|
f"Missing packages: {', '.join(missing_packages)}", |
|
f"Install with: pip install {' '.join(missing_packages)}" |
|
) |
|
return False |
|
elif version_issues: |
|
self.log_test( |
|
"Python Packages", |
|
False, |
|
f"Version issues: {', '.join(version_issues)}", |
|
"Update packages to meet minimum requirements" |
|
) |
|
return False |
|
else: |
|
self.log_test("Python Packages", True, "All required packages installed") |
|
return True |
|
|
|
def test_gpu_environment(self): |
|
"""GPU ๋ฐ CUDA ํ๊ฒฝ ํ์ธ""" |
|
|
|
try: |
|
import torch |
|
|
|
|
|
if not torch.cuda.is_available(): |
|
self.log_test("GPU Environment", False, "CUDA not available") |
|
self.add_recommendation("Install CUDA toolkit and PyTorch with CUDA support") |
|
return False |
|
|
|
|
|
gpu_count = torch.cuda.device_count() |
|
current_device = torch.cuda.current_device() |
|
device_name = torch.cuda.get_device_name(current_device) |
|
|
|
|
|
memory_allocated = torch.cuda.memory_allocated() / 1024**3 |
|
memory_reserved = torch.cuda.memory_reserved() / 1024**3 |
|
memory_total = torch.cuda.get_device_properties(current_device).total_memory / 1024**3 |
|
|
|
details = f"GPUs: {gpu_count}, Current: {device_name}, Memory: {memory_total:.1f}GB total, {memory_reserved:.1f}GB reserved" |
|
|
|
if memory_total < 8.0: |
|
self.log_test("GPU Environment", False, f"GPU memory insufficient: {memory_total:.1f}GB", details) |
|
self.add_recommendation("Use a GPU with at least 8GB VRAM for 7B models") |
|
return False |
|
|
|
self.log_test("GPU Environment", True, f"GPU environment ready", details) |
|
return True |
|
|
|
except Exception as e: |
|
self.log_test("GPU Environment", False, f"Error checking GPU: {e}") |
|
return False |
|
|
|
def test_file_paths_and_permissions(self): |
|
"""ํ์ผ ๊ฒฝ๋ก ๋ฐ ๊ถํ ํ์ธ""" |
|
|
|
critical_paths = { |
|
'/home/ubuntu/RLVR/TestTime-RLVR-v2': 'Main project directory', |
|
'/home/ubuntu/RLVR/TestTime-RLVR-v2/test/configs/ttrlvr_azr_7b_single_gpu.sh': 'AZR config script', |
|
'/data/RLVR/checkpoints': 'Checkpoint directory (will be created)', |
|
'/tmp': 'Temporary directory' |
|
} |
|
|
|
issues = [] |
|
|
|
for path, description in critical_paths.items(): |
|
if not os.path.exists(path): |
|
if 'checkpoints' in path: |
|
|
|
try: |
|
os.makedirs(path, exist_ok=True) |
|
self.log_test(f"Path: {description}", True, f"Created directory: {path}") |
|
except Exception as e: |
|
issues.append(f"{description}: Cannot create {path} - {e}") |
|
else: |
|
issues.append(f"{description}: Not found - {path}") |
|
else: |
|
|
|
readable = os.access(path, os.R_OK) |
|
writable = os.access(path, os.W_OK) |
|
|
|
if not readable: |
|
issues.append(f"{description}: No read permission - {path}") |
|
elif os.path.isdir(path) and not writable: |
|
issues.append(f"{description}: No write permission - {path}") |
|
else: |
|
self.log_test(f"Path: {description}", True, f"Accessible: {path}") |
|
|
|
if issues: |
|
self.log_test("File Paths", False, f"{len(issues)} path issues", "; ".join(issues)) |
|
return False |
|
else: |
|
self.log_test("File Paths", True, "All critical paths accessible") |
|
return True |
|
|
|
def test_model_loading(self): |
|
"""๋ชจ๋ธ ๋ก๋ฉ ํ
์คํธ (๊ฐ๋จํ ํ์ธ)""" |
|
|
|
try: |
|
|
|
from transformers import AutoTokenizer |
|
|
|
|
|
self.log_test("Model Loading", True, "Transformers library available for model loading") |
|
self.add_recommendation("Model loading test skipped to avoid timeout. Run full model test separately if needed.") |
|
return True |
|
|
|
except Exception as e: |
|
self.log_test("Model Loading", False, f"Failed to import transformers: {e}") |
|
self.add_recommendation("Install transformers library: pip install transformers") |
|
return False |
|
|
|
def test_azr_config(self): |
|
"""AZR ์ค์ ํ์ผ ๊ฒ์ฆ""" |
|
|
|
config_path = '/home/ubuntu/RLVR/TestTime-RLVR-v2/test/configs/ttrlvr_azr_7b_single_gpu.sh' |
|
|
|
try: |
|
if not os.path.exists(config_path): |
|
self.log_test("AZR Config", False, f"Config file not found: {config_path}") |
|
return False |
|
|
|
|
|
if not os.access(config_path, os.X_OK): |
|
self.log_test("AZR Config", False, f"Config file not executable: {config_path}") |
|
self.add_recommendation(f"Make config executable: chmod +x {config_path}") |
|
return False |
|
|
|
|
|
with open(config_path, 'r') as f: |
|
content = f.read() |
|
|
|
required_settings = [ |
|
'trainer.project_name=ttrlvr_azr', |
|
'azr.train_propose=False', |
|
'data.train_batch_size=8', |
|
'actor_rollout_ref.actor.ppo_mini_batch_size=24' |
|
] |
|
|
|
missing_settings = [] |
|
for setting in required_settings: |
|
if setting not in content: |
|
missing_settings.append(setting) |
|
|
|
if missing_settings: |
|
self.log_test( |
|
"AZR Config", |
|
False, |
|
f"Missing settings: {', '.join(missing_settings)}", |
|
f"Check config file: {config_path}" |
|
) |
|
return False |
|
|
|
self.log_test("AZR Config", True, f"Config file validated: {config_path}") |
|
return True |
|
|
|
except Exception as e: |
|
self.log_test("AZR Config", False, f"Error validating config: {e}") |
|
return False |
|
|
|
def test_simple_pipeline(self): |
|
"""๊ฐ๋จํ ํ์ดํ๋ผ์ธ ์คํ ํ
์คํธ""" |
|
|
|
try: |
|
from absolute_zero_reasoner.testtime.config import TestTimeConfig |
|
from absolute_zero_reasoner.testtime.logger import TestTimeLogger |
|
from absolute_zero_reasoner.testtime.task_generator import TestTimeTaskGenerator |
|
|
|
|
|
config = TestTimeConfig() |
|
config.model_name = "Qwen/Qwen2.5-7B" |
|
logger = TestTimeLogger() |
|
|
|
|
|
task_generator = TestTimeTaskGenerator(config, logger) |
|
|
|
|
|
test_ipo_triples = [ |
|
{ |
|
'id': 'test_triple_0', |
|
'input': '[1, 2, 3]', |
|
'actual_output': '[2, 4, 6]', |
|
'program': 'def test_func(lst):\n return [x * 2 for x in lst]', |
|
'full_input_str': 'test_func([1, 2, 3])', |
|
'source_program_id': 'program_0', |
|
'ipo_index': 0 |
|
} |
|
] |
|
|
|
|
|
tasks = task_generator.generate_tasks(test_ipo_triples, "TestProblem", 1) |
|
|
|
|
|
if not tasks or not any(len(task_list) > 0 for task_list in tasks.values()): |
|
self.log_test("Simple Pipeline", False, "No tasks generated") |
|
return False |
|
|
|
|
|
for task_type, task_list in tasks.items(): |
|
if task_list: |
|
task = task_list[0] |
|
required_fields = ['uid', 'ipo_group_id', 'basic_accuracy', 'ground_truth'] |
|
missing_fields = [field for field in required_fields if field not in task] |
|
|
|
if missing_fields: |
|
self.log_test( |
|
"Simple Pipeline", |
|
False, |
|
f"Missing AZR metadata: {missing_fields}" |
|
) |
|
return False |
|
|
|
total_tasks = sum(len(task_list) for task_list in tasks.values()) |
|
self.log_test("Simple Pipeline", True, f"Generated {total_tasks} tasks successfully") |
|
return True |
|
|
|
except Exception as e: |
|
self.log_test("Simple Pipeline", False, f"Pipeline test failed: {e}") |
|
return False |
|
|
|
def run_all_tests(self): |
|
"""๋ชจ๋ ํ
์คํธ ์คํ""" |
|
|
|
print("๐ TTRLVR + AZR ํ๊ฒฝ ๊ฒ์ฆ ์์") |
|
print("=" * 60) |
|
|
|
tests = [ |
|
self.test_python_packages, |
|
self.test_gpu_environment, |
|
self.test_file_paths_and_permissions, |
|
self.test_model_loading, |
|
self.test_azr_config, |
|
self.test_simple_pipeline |
|
] |
|
|
|
passed_tests = 0 |
|
total_tests = len(tests) |
|
|
|
for test in tests: |
|
try: |
|
if test(): |
|
passed_tests += 1 |
|
print() |
|
except Exception as e: |
|
print(f"โ Test {test.__name__} crashed: {e}") |
|
print(f" Traceback: {traceback.format_exc()}") |
|
print() |
|
|
|
|
|
success_rate = passed_tests / total_tests * 100 |
|
self.results['overall_success'] = passed_tests == total_tests |
|
|
|
print("=" * 60) |
|
print("๐ ํ๊ฒฝ ๊ฒ์ฆ ๊ฒฐ๊ณผ:") |
|
print(f" - ํต๊ณผํ ํ
์คํธ: {passed_tests}/{total_tests} ({success_rate:.1f}%)") |
|
|
|
if self.results['recommendations']: |
|
print(f"\n๐ก ๊ถ์ฅ์ฌํญ ({len(self.results['recommendations'])}๊ฐ):") |
|
for i, rec in enumerate(self.results['recommendations'], 1): |
|
print(f" {i}. {rec}") |
|
|
|
if self.results['overall_success']: |
|
print("\n๐ ํ๊ฒฝ ๊ฒ์ฆ ์๋ฃ! TTRLVR + AZR ์คํ ์ค๋น๊ฐ ์๋ฃ๋์์ต๋๋ค.") |
|
else: |
|
print(f"\nโ ๏ธ ํ๊ฒฝ ๊ฒ์ฆ ์คํจ: {total_tests - passed_tests}๊ฐ ํ
์คํธ ์คํจ") |
|
print(" ์์ ๊ถ์ฅ์ฌํญ์ ์ฐธ๊ณ ํ์ฌ ๋ฌธ์ ๋ฅผ ํด๊ฒฐํ ํ ๋ค์ ์๋ํ์ธ์.") |
|
|
|
return self.results |
|
|
|
|
|
def main(): |
|
"""๋ฉ์ธ ์คํ ํจ์""" |
|
|
|
validator = EnvironmentValidator() |
|
results = validator.run_all_tests() |
|
|
|
|
|
output_file = f"/tmp/ttrlvr_azr_validation_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json" |
|
with open(output_file, 'w') as f: |
|
json.dump(results, f, indent=2) |
|
|
|
print(f"\n๐ ์์ธ ๊ฒฐ๊ณผ ์ ์ฅ: {output_file}") |
|
|
|
return 0 if results['overall_success'] else 1 |
|
|
|
|
|
if __name__ == '__main__': |
|
sys.exit(main()) |