File size: 5,025 Bytes
9c733bd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
import heapq
import math
import random
import re
import json
from typing import List, Tuple, Dict, Any, Optional
import itertools
from transformers import AutoTokenizer
import asyncio # New import added for async handling
from openai import AsyncOpenAI # Using AsyncOpenAI as client
import numpy as np
from openai import OpenAI
import openai
import json
import re
def read_jsonl(file_path):
data = []
with open(file_path, 'r', encoding='utf-8') as file:
for line in file:
data.append(json.loads(line.strip()))
return data
def extract_answer_judge(solution_text: str):
boxed_pattern = r'\\boxed\{([^}]*)\}'
matches = re.findall(boxed_pattern, solution_text)
if matches:
return matches[-1].strip()
return None
def separate_steps(steps: List[str], mode: str = 'join') -> Any:
delimiter = "\n\n"
if mode == 'join':
if not isinstance(steps, list):
raise TypeError("For 'join' mode, 'steps' must be a list of strings.")
return delimiter.join(steps)
elif mode == 'split':
if not isinstance(steps, str):
raise TypeError("For 'split' mode, 'steps' must be a string.")
return steps.split(delimiter)
else:
raise ValueError("Mode should be either 'join' or 'split'.")
def evaluate_llm_as_judge(problem: str, steps: list, final_answer, output_type: str = 'bool') -> bool:
global client
client = OpenAI(
base_url="http://localhost:8014/v1",
api_key="token-abc123"
)
# client = OpenAI(
# base_url="https://ark.cn-beijing.volces.com/api/v3",
# api_key="d61217e7-8ff3-4937-83ed-3dd2bebf72ad"
# )
model_name = "DeepSeek-R1-Distill-Qwen-14B"
# model_name = 'deepseek-v3-241226'
messages = []
feedback = None
judge_prompt = f"""
I will show you a [Math Problem], the [Answer], and an [AI's Solution] generated by an AI assistant. Your task is to determine if the **final answer** in the [AI's Solution] matches the answer in the [Reference Solution].
--------------------------------------------------
[Math Problem]
{problem}
[Answer]
{final_answer}
[AI's Solution]
{steps}
--------------------------------------------------
Please evaluate whether the **Answer:** in the [AI's Solution] is correct, based solely on whether it matches the **final answer** in the [Answer].
Note that the [AI's Solution] does not need to replicate same reasoning steps of the [Answer]; it only needs to reach the same **final answer** to be considered correct.
Reply with only "Yes" or "No" in the end of your response.
"""
messages.append({
'role': 'user',
'content': judge_prompt
})
completion = client.chat.completions.create(
model=model_name,
messages=messages,
n=1,
temperature=0.6,
max_tokens=8192,
)
response = completion.choices[0].message.content
# print('*****step*****',steps)
print("*****Verification*****:", response)
content = response.strip()
last_words = ' '.join(content.split()[-3:]) # Last 3 words
# print('last_words:', last_words)
if "Yes" in last_words:
yes_or_no = "Yes"
elif "No" in last_words:
yes_or_no = "No"
else:
yes_or_no = None
print('yes_or_no',yes_or_no)
merged_data = {
'question': problem,
'final_answer': final_answer,
'reasining_steps': steps,
'yes_or_no': yes_or_no,
'response': response
}
return merged_data
new_file_path = '/data/zeju/O1_data/0311_test_training_new_processed.jsonl'
data_all = read_jsonl(new_file_path)
print(len(data_all))
output = []
zero = 0
for data in data_all:
print(data_all.index(data))
problem = data['question']
steps_ori = data['process']
labels = data['label']
final_answer = data['answer']
steps = steps_ori.split('\n\n')
steps[0] = problem + ' ' + steps[0]
# print('steps:',steps)
steps_updated = steps[0:len(steps)-1]
if zero in data['label']:
merged_data = evaluate_llm_as_judge(problem=problem, steps=steps_updated, final_answer= final_answer, output_type='bool')
if merged_data != None:
output.append(merged_data)
else:
merged_data = {
'question': problem,
'final_answer': final_answer,
'reasining_steps': steps_updated,
'yes_or_no': "No",
'response': '<think>\n\n</think>-1'
}
output.append(merged_data)
output_file = '/data/zeju/O1_data/0312_test_80_washdata.jsonl'
with open(output_file, 'w', encoding='utf-8') as output_file:
for entry in output:
output_file.write(json.dumps(entry, ensure_ascii=False) + '\n')
print(f"数据已成功写入 {output_file}")
|