neural-mesh / test_verl_model_generation.py
hjkim00's picture
Upload TestTime-RLVR-v2 from Full-pipeline-relative_0827 branch
f50dc54 verified
#!/usr/bin/env python3
"""
VeRL λͺ¨λΈ 생성 ν…ŒμŠ€νŠΈ
데이터λ₯Ό λ”•μ…”λ„ˆλ¦¬ 리슀트둜 λ³€ν™˜ν•˜μ—¬ 정상적인 응닡이 λ‚˜μ˜€λŠ”μ§€ 확인
"""
import os
import sys
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
print("=" * 80)
print("VeRL λͺ¨λΈ 생성 ν…ŒμŠ€νŠΈ")
print("=" * 80)
# GPU μ„€μ •
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")
# 1. λͺ¨λΈκ³Ό ν† ν¬λ‚˜μ΄μ € λ‘œλ“œ
print("\nλͺ¨λΈ λ‘œλ”© 쀑...")
model_name = "Qwen/Qwen2.5-7B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.bfloat16,
device_map="auto"
)
print(f"λͺ¨λΈ λ‘œλ“œ μ™„λ£Œ: {model_name}")
# 2. 데이터 λ‘œλ“œ
data_path = "/home/ubuntu/RLVR/TestTime-RLVR-v2/tmp/batch_results/ttrlvr_azr_unified_20250822_151912/mbpp/Mbpp_2/round_1/azr_training_data/induction.parquet"
df = pd.read_parquet(data_path)
print(f"\n데이터 λ‘œλ“œ: {len(df)} μƒ˜ν”Œ")
# 3. ν…ŒμŠ€νŠΈν•  μƒ˜ν”Œλ“€
test_samples = []
# μƒ˜ν”Œ 1: μ‹€μ œ λ°μ΄ν„°μ—μ„œ 첫 번째 μƒ˜ν”Œ (처음 500자만 μ‚¬μš©)
prompt_str = df.iloc[0]['prompt']
if isinstance(prompt_str, str):
# λ„ˆλ¬΄ κΈΈλ©΄ μž˜λΌμ„œ μ‚¬μš©
truncated_prompt = prompt_str[:1000] + "\n\nAssistant:"
prompt_dict = [{"role": "user", "content": truncated_prompt}]
else:
prompt_dict = prompt_str
test_samples.append(("μ‹€μ œ 데이터 μƒ˜ν”Œ (truncated)", prompt_dict))
# μƒ˜ν”Œ 2: κ°„λ‹¨ν•œ μ½”λ”© 문제
simple_prompt = [{
"role": "user",
"content": "Write a Python function to calculate the factorial of a number."
}]
test_samples.append(("κ°„λ‹¨ν•œ μ½”λ”© 문제", simple_prompt))
# μƒ˜ν”Œ 3: AZR μŠ€νƒ€μΌ ν”„λ‘¬ν”„νŠΈ
azr_style_prompt = [{
"role": "user",
"content": """Write a function that takes a list of numbers and returns the sum of all even numbers.
def sum_even_numbers(numbers):"""
}]
test_samples.append(("AZR μŠ€νƒ€μΌ", azr_style_prompt))
# 4. 각 μƒ˜ν”Œμ— λŒ€ν•΄ 생성 ν…ŒμŠ€νŠΈ
print("\n" + "=" * 80)
print("생성 ν…ŒμŠ€νŠΈ μ‹œμž‘")
print("=" * 80)
for i, (name, prompt_dict) in enumerate(test_samples, 1):
print(f"\n[ν…ŒμŠ€νŠΈ {i}] {name}")
print("-" * 60)
# Chat template 적용
prompt_with_template = tokenizer.apply_chat_template(
prompt_dict,
add_generation_prompt=True,
tokenize=False
)
print(f"ν…œν”Œλ¦Ώ 적용 ν›„ μ‹œμž‘: {repr(prompt_with_template[:100])}...")
# 토큰화
inputs = tokenizer(
prompt_with_template,
return_tensors="pt",
truncation=True,
max_length=1024
).to(device)
print(f"μž…λ ₯ 토큰 수: {inputs['input_ids'].shape[1]}")
# 생성
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=150,
temperature=0.7,
do_sample=True,
top_p=0.95,
pad_token_id=tokenizer.pad_token_id,
eos_token_id=tokenizer.eos_token_id
)
# λ””μ½”λ”©
generated = outputs[0][inputs['input_ids'].shape[1]:]
response = tokenizer.decode(generated, skip_special_tokens=True)
print(f"\nμƒμ„±λœ 응닡:")
print(">" * 40)
print(response[:500]) # 처음 500자만 좜λ ₯
print("<" * 40)
# 응닡 ν’ˆμ§ˆ 체크
if any(keyword in response.lower() for keyword in ['def ', 'return', 'function', 'python', '```']):
print("βœ… μ½”λ“œ κ΄€λ ¨ 응닡 생성됨")
elif any(keyword in response.lower() for keyword in ['stravinsky', 'department', 'openstring', 'δΈ­ζ–‡']):
print("❌ μ΄μƒν•œ 응닡 생성됨")
else:
print("❓ 응닡 νƒ€μž… 뢈λͺ…ν™•")
print("\n" + "=" * 80)
print("ν…ŒμŠ€νŠΈ μ™„λ£Œ")
print("=" * 80)
# 5. κ²°λ‘ 
print("\nκ²°λ‘ :")
print("-" * 60)
print("μœ„ ν…ŒμŠ€νŠΈμ—μ„œ 정상적인 μ½”λ“œκ°€ μƒμ„±λ˜λ©΄:")
print(" β†’ λ”•μ…”λ„ˆλ¦¬ 리슀트 ν˜•μ‹μ΄ μ˜¬λ°”λ₯΄κ²Œ μž‘λ™ν•¨")
print(" β†’ complete_pipeline.py μˆ˜μ •μ΄ μ œλŒ€λ‘œ μ μš©λ˜μ§€ μ•Šμ€ 것이 문제")
print("\nμ΄μƒν•œ 응닡이 계속 μƒμ„±λ˜λ©΄:")
print(" β†’ λ‹€λ₯Έ 근본적인 λ¬Έμ œκ°€ μžˆμ„ 수 있음")