File size: 4,213 Bytes
f50dc54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#!/usr/bin/env python3
"""
VeRL λͺ¨λΈ 생성 ν…ŒμŠ€νŠΈ
데이터λ₯Ό λ”•μ…”λ„ˆλ¦¬ 리슀트둜 λ³€ν™˜ν•˜μ—¬ 정상적인 응닡이 λ‚˜μ˜€λŠ”μ§€ 확인
"""
import os
import sys
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

print("=" * 80)
print("VeRL λͺ¨λΈ 생성 ν…ŒμŠ€νŠΈ")
print("=" * 80)

# GPU μ„€μ •
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")

# 1. λͺ¨λΈκ³Ό ν† ν¬λ‚˜μ΄μ € λ‘œλ“œ
print("\nλͺ¨λΈ λ‘œλ”© 쀑...")
model_name = "Qwen/Qwen2.5-7B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto"
)
print(f"λͺ¨λΈ λ‘œλ“œ μ™„λ£Œ: {model_name}")

# 2. 데이터 λ‘œλ“œ
data_path = "/home/ubuntu/RLVR/TestTime-RLVR-v2/tmp/batch_results/ttrlvr_azr_unified_20250822_151912/mbpp/Mbpp_2/round_1/azr_training_data/induction.parquet"
df = pd.read_parquet(data_path)
print(f"\n데이터 λ‘œλ“œ: {len(df)} μƒ˜ν”Œ")

# 3. ν…ŒμŠ€νŠΈν•  μƒ˜ν”Œλ“€
test_samples = []

# μƒ˜ν”Œ 1: μ‹€μ œ λ°μ΄ν„°μ—μ„œ 첫 번째 μƒ˜ν”Œ (처음 500자만 μ‚¬μš©)
prompt_str = df.iloc[0]['prompt']
if isinstance(prompt_str, str):
    # λ„ˆλ¬΄ κΈΈλ©΄ μž˜λΌμ„œ μ‚¬μš©
    truncated_prompt = prompt_str[:1000] + "\n\nAssistant:"
    prompt_dict = [{"role": "user", "content": truncated_prompt}]
else:
    prompt_dict = prompt_str
test_samples.append(("μ‹€μ œ 데이터 μƒ˜ν”Œ (truncated)", prompt_dict))

# μƒ˜ν”Œ 2: κ°„λ‹¨ν•œ μ½”λ”© 문제
simple_prompt = [{
    "role": "user",
    "content": "Write a Python function to calculate the factorial of a number."
}]
test_samples.append(("κ°„λ‹¨ν•œ μ½”λ”© 문제", simple_prompt))

# μƒ˜ν”Œ 3: AZR μŠ€νƒ€μΌ ν”„λ‘¬ν”„νŠΈ
azr_style_prompt = [{
    "role": "user",
    "content": """Write a function that takes a list of numbers and returns the sum of all even numbers.

def sum_even_numbers(numbers):"""
}]
test_samples.append(("AZR μŠ€νƒ€μΌ", azr_style_prompt))

# 4. 각 μƒ˜ν”Œμ— λŒ€ν•΄ 생성 ν…ŒμŠ€νŠΈ
print("\n" + "=" * 80)
print("생성 ν…ŒμŠ€νŠΈ μ‹œμž‘")
print("=" * 80)

for i, (name, prompt_dict) in enumerate(test_samples, 1):
    print(f"\n[ν…ŒμŠ€νŠΈ {i}] {name}")
    print("-" * 60)
    
    # Chat template 적용
    prompt_with_template = tokenizer.apply_chat_template(
        prompt_dict,
        add_generation_prompt=True,
        tokenize=False
    )
    
    print(f"ν…œν”Œλ¦Ώ 적용 ν›„ μ‹œμž‘: {repr(prompt_with_template[:100])}...")
    
    # 토큰화
    inputs = tokenizer(
        prompt_with_template,
        return_tensors="pt",
        truncation=True,
        max_length=1024
    ).to(device)
    
    print(f"μž…λ ₯ 토큰 수: {inputs['input_ids'].shape[1]}")
    
    # 생성
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=150,
            temperature=0.7,
            do_sample=True,
            top_p=0.95,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id
        )
    
    # λ””μ½”λ”©
    generated = outputs[0][inputs['input_ids'].shape[1]:]
    response = tokenizer.decode(generated, skip_special_tokens=True)
    
    print(f"\nμƒμ„±λœ 응닡:")
    print(">" * 40)
    print(response[:500])  # 처음 500자만 좜λ ₯
    print("<" * 40)
    
    # 응닡 ν’ˆμ§ˆ 체크
    if any(keyword in response.lower() for keyword in ['def ', 'return', 'function', 'python', '```']):
        print("βœ… μ½”λ“œ κ΄€λ ¨ 응닡 생성됨")
    elif any(keyword in response.lower() for keyword in ['stravinsky', 'department', 'openstring', 'δΈ­ζ–‡']):
        print("❌ μ΄μƒν•œ 응닡 생성됨")
    else:
        print("❓ 응닡 νƒ€μž… 뢈λͺ…ν™•")

print("\n" + "=" * 80)
print("ν…ŒμŠ€νŠΈ μ™„λ£Œ")
print("=" * 80)

# 5. κ²°λ‘ 
print("\nκ²°λ‘ :")
print("-" * 60)
print("μœ„ ν…ŒμŠ€νŠΈμ—μ„œ 정상적인 μ½”λ“œκ°€ μƒμ„±λ˜λ©΄:")
print("  β†’ λ”•μ…”λ„ˆλ¦¬ 리슀트 ν˜•μ‹μ΄ μ˜¬λ°”λ₯΄κ²Œ μž‘λ™ν•¨")
print("  β†’ complete_pipeline.py μˆ˜μ •μ΄ μ œλŒ€λ‘œ μ μš©λ˜μ§€ μ•Šμ€ 것이 문제")
print("\nμ΄μƒν•œ 응닡이 계속 μƒμ„±λ˜λ©΄:")
print("  β†’ λ‹€λ₯Έ 근본적인 λ¬Έμ œκ°€ μžˆμ„ 수 있음")