neural-mesh / test /test_integrated_training.py
hjkim00's picture
Upload TestTime-RLVR-v2 from Full-pipeline-relative_0827 branch
f50dc54 verified
#!/usr/bin/env python3
"""
ํ†ตํ•ฉ ํ•™์Šต ์‹œ์Šคํ…œ ํ…Œ์ŠคํŠธ
train_ttrlvr_azr.py๊ฐ€ ์˜ฌ๋ฐ”๋ฅด๊ฒŒ ์ž‘๋™ํ•˜๋Š”์ง€ ํ™•์ธ
"""
import os
import sys
import subprocess
import time
# ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ์„ค์ •
os.environ['CUDA_VISIBLE_DEVICES'] = '5'
os.environ['VLLM_ATTENTION_BACKEND'] = 'FLASH_ATTN'
os.environ['RAY_memory_monitor_refresh_ms'] = '0'
os.environ['RAY_LOGGING_LEVEL'] = 'DEBUG'
os.environ['HYDRA_FULL_ERROR'] = '1'
os.environ['PYTHONPATH'] = f"{os.environ.get('PYTHONPATH', '')}:/home/ubuntu/RLVR/verl:/home/ubuntu/RLVR/TestTime-RLVR-v2"
def test_small_run():
"""์ž‘์€ ๊ทœ๋ชจ๋กœ ํ†ตํ•ฉ ํ…Œ์ŠคํŠธ"""
print("=" * 80)
print("TTRLVR + AZR ํ†ตํ•ฉ ํ•™์Šต ํ…Œ์ŠคํŠธ")
print("=" * 80)
print()
# ํ…Œ์ŠคํŠธ ํŒŒ๋ผ๋ฏธํ„ฐ
cmd = [
sys.executable,
"/home/ubuntu/RLVR/TestTime-RLVR-v2/test/train_ttrlvr_azr.py",
"--benchmark", "mbpp",
"--problems", "2", # 2๋ฌธ์ œ๋งŒ ํ…Œ์ŠคํŠธ
"--rounds", "2", # 2๋ผ์šด๋“œ๋งŒ ํ…Œ์ŠคํŠธ
"--gpu", "5",
"--batch-size", "1", # ๋ฉ”๋ชจ๋ฆฌ ์ ˆ์•ฝ
"--model", "Qwen/Qwen2.5-7B",
"--debug" # ๋””๋ฒ„๊ทธ ๋ชจ๋“œ
]
print(f"๋ช…๋ น์–ด: {' '.join(cmd)}")
print()
print("ํ™˜๊ฒฝ ๋ณ€์ˆ˜:")
for key in ['CUDA_VISIBLE_DEVICES', 'VLLM_ATTENTION_BACKEND', 'PYTHONPATH']:
print(f" {key}: {os.environ.get(key, 'NOT SET')}")
print()
start_time = time.time()
try:
# subprocess ์‹คํ–‰
result = subprocess.run(
cmd,
capture_output=False, # ์‹ค์‹œ๊ฐ„ ์ถœ๋ ฅ ๋ณด๊ธฐ
text=True
)
elapsed = time.time() - start_time
print()
print("=" * 80)
print(f"ํ…Œ์ŠคํŠธ ์™„๋ฃŒ (์†Œ์š” ์‹œ๊ฐ„: {elapsed:.1f}์ดˆ)")
print(f"์ข…๋ฃŒ ์ฝ”๋“œ: {result.returncode}")
print("=" * 80)
if result.returncode == 0:
print("โœ… ์„ฑ๊ณต!")
# ์ƒ์„ฑ๋œ ๋ฐ์ดํ„ฐ ํ™•์ธ
check_generated_data()
else:
print("โŒ ์‹คํŒจ!")
except Exception as e:
print(f"๐Ÿ’ฅ ์˜ˆ์™ธ ๋ฐœ์ƒ: {e}")
def check_generated_data():
"""์ƒ์„ฑ๋œ ๋ฐ์ดํ„ฐ ํ™•์ธ"""
print("\n์ƒ์„ฑ๋œ ๋ฐ์ดํ„ฐ ํ™•์ธ:")
# ์ฒซ ๋ฒˆ์งธ ๋ผ์šด๋“œ ๋ฐ์ดํ„ฐ ํ™•์ธ
round1_path = "/tmp/ttrlvr_azr_training/round_1"
if os.path.exists(round1_path):
print(f"\n๐Ÿ“ {round1_path}:")
for file in os.listdir(round1_path):
file_path = os.path.join(round1_path, file)
if os.path.isfile(file_path):
size = os.path.getsize(file_path)
print(f" - {file}: {size:,} bytes")
if file.endswith('.parquet'):
try:
import pandas as pd
df = pd.read_parquet(file_path)
print(f" ํ–‰ ์ˆ˜: {len(df)}")
except:
pass
# ์ฒดํฌํฌ์ธํŠธ ํ™•์ธ
checkpoint_base = "/data/RLVR/checkpoints/ttrlvr_azr"
if os.path.exists(checkpoint_base):
print(f"\n๐Ÿ“ ์ฒดํฌํฌ์ธํŠธ ๋””๋ ‰ํ† ๋ฆฌ: {checkpoint_base}")
for exp_dir in os.listdir(checkpoint_base):
exp_path = os.path.join(checkpoint_base, exp_dir)
if os.path.isdir(exp_path):
print(f" - {exp_dir}/")
for checkpoint in os.listdir(exp_path):
print(f" - {checkpoint}")
def test_subprocess_azr():
"""subprocess AZR ํ˜ธ์ถœ๋งŒ ํ…Œ์ŠคํŠธ"""
print("\n" + "=" * 80)
print("Subprocess AZR ํ˜ธ์ถœ ํ…Œ์ŠคํŠธ")
print("=" * 80)
# ํ…Œ์ŠคํŠธ ๋ฐ์ดํ„ฐ๊ฐ€ ์žˆ๋Š”์ง€ ํ™•์ธ
test_data_path = "/home/ubuntu/RLVR/TestTime-RLVR-v2/tmp/batch_results/ttrlvr_azr_20250730_155352/mbpp/Mbpp_7/round_1/azr_training_data"
if not os.path.exists(test_data_path):
print(f"ํ…Œ์ŠคํŠธ ๋ฐ์ดํ„ฐ ์—†์Œ: {test_data_path}")
return
sys.path.append('/home/ubuntu/RLVR/TestTime-RLVR-v2/test')
from utils.run_azr_subprocess import run_azr_training
success = run_azr_training(
training_data_path=test_data_path,
experiment_name="test_subprocess",
gpu_id=5
)
print(f"\nSubprocess ๊ฒฐ๊ณผ: {'์„ฑ๊ณต' if success else '์‹คํŒจ'}")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--mode', choices=['full', 'subprocess'], default='full',
help='ํ…Œ์ŠคํŠธ ๋ชจ๋“œ ์„ ํƒ')
args = parser.parse_args()
if args.mode == 'full':
test_small_run()
else:
test_subprocess_azr()