neural-mesh / test_debug_verl.py
hjkim00's picture
Upload TestTime-RLVR-v2 from Full-pipeline-relative_0827 branch
f50dc54 verified
#!/usr/bin/env python3
"""Test script to debug VeRL training"""
import sys
import os
sys.path.insert(0, '/home/ubuntu/RLVR/TestTime-RLVR-v2')
sys.path.insert(0, '/home/ubuntu/RLVR/verl')
# Create dummy training data
import pandas as pd
import numpy as np
output_dir = "./test_time_output_debug"
training_data_path = os.path.join(output_dir, "training_data")
os.makedirs(training_data_path, exist_ok=True)
# Create minimal dummy data for each task type
for task_type in ['induction', 'deduction', 'abduction']:
data = {
'prompts': ['test prompt ' + task_type],
'responses': ['test response ' + task_type],
'rewards': [1.0],
'problem_id': ['test_id'],
'token_level_scores': [np.array([1.0] * 10)] # Dummy scores
}
df = pd.DataFrame(data)
df.to_parquet(os.path.join(training_data_path, f'{task_type}.parquet'))
print(f"Created dummy training data in {training_data_path}")
# Now run Step 5 only
from test.train_ttrlvr_azr import main
import argparse
args = argparse.Namespace(
benchmark='mbpp',
problem_id='Mbpp/2',
rounds=1,
config='test/configs/ttrlvr_azr_ppo_4gpu.yaml',
step5_only=True,
data_path=training_data_path,
output_dir=output_dir,
model='Qwen/Qwen2.5-7B',
debug=True,
batch_size=24,
batch_epochs=1,
num_programs=4,
input_generation_rounds=3,
parallel_batch_size=4,
eval_rounds=5,
skip_task_eval=False,
save_every_round=False,
save_round_interval=5,
problems=10,
resume=1,
gpu=None
)
# Patch sys.argv for argparse
sys.argv = ['test_debug_verl.py']
# Run main
main(args)