|
|
|
""" |
|
ํตํฉ ํ์ต ์์คํ
ํ
์คํธ |
|
train_ttrlvr_azr.py๊ฐ ์ฌ๋ฐ๋ฅด๊ฒ ์๋ํ๋์ง ํ์ธ |
|
""" |
|
|
|
import os |
|
import sys |
|
import subprocess |
|
import time |
|
|
|
|
|
os.environ['CUDA_VISIBLE_DEVICES'] = '5' |
|
os.environ['VLLM_ATTENTION_BACKEND'] = 'FLASH_ATTN' |
|
os.environ['RAY_memory_monitor_refresh_ms'] = '0' |
|
os.environ['RAY_LOGGING_LEVEL'] = 'DEBUG' |
|
os.environ['HYDRA_FULL_ERROR'] = '1' |
|
os.environ['PYTHONPATH'] = f"{os.environ.get('PYTHONPATH', '')}:/home/ubuntu/RLVR/verl:/home/ubuntu/RLVR/TestTime-RLVR-v2" |
|
|
|
|
|
def test_small_run(): |
|
"""์์ ๊ท๋ชจ๋ก ํตํฉ ํ
์คํธ""" |
|
|
|
print("=" * 80) |
|
print("TTRLVR + AZR ํตํฉ ํ์ต ํ
์คํธ") |
|
print("=" * 80) |
|
print() |
|
|
|
|
|
cmd = [ |
|
sys.executable, |
|
"/home/ubuntu/RLVR/TestTime-RLVR-v2/test/train_ttrlvr_azr.py", |
|
"--benchmark", "mbpp", |
|
"--problems", "2", |
|
"--rounds", "2", |
|
"--gpu", "5", |
|
"--batch-size", "1", |
|
"--model", "Qwen/Qwen2.5-7B", |
|
"--debug" |
|
] |
|
|
|
print(f"๋ช
๋ น์ด: {' '.join(cmd)}") |
|
print() |
|
print("ํ๊ฒฝ ๋ณ์:") |
|
for key in ['CUDA_VISIBLE_DEVICES', 'VLLM_ATTENTION_BACKEND', 'PYTHONPATH']: |
|
print(f" {key}: {os.environ.get(key, 'NOT SET')}") |
|
print() |
|
|
|
start_time = time.time() |
|
|
|
try: |
|
|
|
result = subprocess.run( |
|
cmd, |
|
capture_output=False, |
|
text=True |
|
) |
|
|
|
elapsed = time.time() - start_time |
|
|
|
print() |
|
print("=" * 80) |
|
print(f"ํ
์คํธ ์๋ฃ (์์ ์๊ฐ: {elapsed:.1f}์ด)") |
|
print(f"์ข
๋ฃ ์ฝ๋: {result.returncode}") |
|
print("=" * 80) |
|
|
|
if result.returncode == 0: |
|
print("โ
์ฑ๊ณต!") |
|
|
|
|
|
check_generated_data() |
|
else: |
|
print("โ ์คํจ!") |
|
|
|
except Exception as e: |
|
print(f"๐ฅ ์์ธ ๋ฐ์: {e}") |
|
|
|
|
|
def check_generated_data(): |
|
"""์์ฑ๋ ๋ฐ์ดํฐ ํ์ธ""" |
|
|
|
print("\n์์ฑ๋ ๋ฐ์ดํฐ ํ์ธ:") |
|
|
|
|
|
round1_path = "/tmp/ttrlvr_azr_training/round_1" |
|
if os.path.exists(round1_path): |
|
print(f"\n๐ {round1_path}:") |
|
for file in os.listdir(round1_path): |
|
file_path = os.path.join(round1_path, file) |
|
if os.path.isfile(file_path): |
|
size = os.path.getsize(file_path) |
|
print(f" - {file}: {size:,} bytes") |
|
|
|
if file.endswith('.parquet'): |
|
try: |
|
import pandas as pd |
|
df = pd.read_parquet(file_path) |
|
print(f" ํ ์: {len(df)}") |
|
except: |
|
pass |
|
|
|
|
|
checkpoint_base = "/data/RLVR/checkpoints/ttrlvr_azr" |
|
if os.path.exists(checkpoint_base): |
|
print(f"\n๐ ์ฒดํฌํฌ์ธํธ ๋๋ ํ ๋ฆฌ: {checkpoint_base}") |
|
for exp_dir in os.listdir(checkpoint_base): |
|
exp_path = os.path.join(checkpoint_base, exp_dir) |
|
if os.path.isdir(exp_path): |
|
print(f" - {exp_dir}/") |
|
for checkpoint in os.listdir(exp_path): |
|
print(f" - {checkpoint}") |
|
|
|
|
|
def test_subprocess_azr(): |
|
"""subprocess AZR ํธ์ถ๋ง ํ
์คํธ""" |
|
|
|
print("\n" + "=" * 80) |
|
print("Subprocess AZR ํธ์ถ ํ
์คํธ") |
|
print("=" * 80) |
|
|
|
|
|
test_data_path = "/home/ubuntu/RLVR/TestTime-RLVR-v2/tmp/batch_results/ttrlvr_azr_20250730_155352/mbpp/Mbpp_7/round_1/azr_training_data" |
|
|
|
if not os.path.exists(test_data_path): |
|
print(f"ํ
์คํธ ๋ฐ์ดํฐ ์์: {test_data_path}") |
|
return |
|
|
|
sys.path.append('/home/ubuntu/RLVR/TestTime-RLVR-v2/test') |
|
from utils.run_azr_subprocess import run_azr_training |
|
|
|
success = run_azr_training( |
|
training_data_path=test_data_path, |
|
experiment_name="test_subprocess", |
|
gpu_id=5 |
|
) |
|
|
|
print(f"\nSubprocess ๊ฒฐ๊ณผ: {'์ฑ๊ณต' if success else '์คํจ'}") |
|
|
|
|
|
if __name__ == "__main__": |
|
import argparse |
|
|
|
parser = argparse.ArgumentParser() |
|
parser.add_argument('--mode', choices=['full', 'subprocess'], default='full', |
|
help='ํ
์คํธ ๋ชจ๋ ์ ํ') |
|
|
|
args = parser.parse_args() |
|
|
|
if args.mode == 'full': |
|
test_small_run() |
|
else: |
|
test_subprocess_azr() |