File size: 5,025 Bytes

9c733bd

import heapq
import math
import random
import re
import json
from typing import List, Tuple, Dict, Any, Optional
import itertools
from transformers import AutoTokenizer
import asyncio  # New import added for async handling
from openai import AsyncOpenAI   # Using AsyncOpenAI as client
import numpy as np
from openai import OpenAI
import openai
import json

import re
def read_jsonl(file_path):
    data = []
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            data.append(json.loads(line.strip()))
    return data

def extract_answer_judge(solution_text: str):
    boxed_pattern = r'\\boxed\{([^}]*)\}'
    matches = re.findall(boxed_pattern, solution_text)
    if matches:
        return matches[-1].strip()
    return None

def separate_steps(steps: List[str], mode: str = 'join') -> Any:
    delimiter = "\n\n"
    if mode == 'join':
        if not isinstance(steps, list):
            raise TypeError("For 'join' mode, 'steps' must be a list of strings.")
        return delimiter.join(steps)
    elif mode == 'split':
        if not isinstance(steps, str):
            raise TypeError("For 'split' mode, 'steps' must be a string.")
        return steps.split(delimiter)
    else:
        raise ValueError("Mode should be either 'join' or 'split'.")
    
    
    
def evaluate_llm_as_judge(problem: str, steps: list, final_answer, output_type: str = 'bool') -> bool:
    global client

    client = OpenAI(
    base_url="http://localhost:8014/v1",
    api_key="token-abc123"
        )
    
    
    # client = OpenAI(
    # base_url="https://ark.cn-beijing.volces.com/api/v3",
    # api_key="d61217e7-8ff3-4937-83ed-3dd2bebf72ad"
    #     )
 
    model_name = "DeepSeek-R1-Distill-Qwen-14B"
    
    # model_name = 'deepseek-v3-241226'
    messages = []
    feedback = None

    judge_prompt = f"""
        I will show you a [Math Problem], the [Answer], and an [AI's Solution] generated by an AI assistant. Your task is to determine if the **final answer** in the [AI's Solution] matches the answer in the [Reference Solution].

        --------------------------------------------------

        [Math Problem]

        {problem}

        [Answer]

        {final_answer}

        [AI's Solution]

        {steps}

        --------------------------------------------------

        Please evaluate whether the **Answer:** in the [AI's Solution] is correct, based solely on whether it matches the **final answer** in the [Answer].

        Note that the [AI's Solution] does not need to replicate same reasoning steps of the [Answer]; it only needs to reach the same **final answer** to be considered correct.

        Reply with only "Yes" or "No" in the end of your response.

    """
    messages.append({
            'role': 'user',
            'content': judge_prompt
        })
    completion = client.chat.completions.create(
        model=model_name,
        messages=messages,
        n=1,
        temperature=0.6,
        max_tokens=8192,
    )
    response = completion.choices[0].message.content
    # print('*****step*****',steps)
    print("*****Verification*****:", response)
        

    content = response.strip()
    last_words = ' '.join(content.split()[-3:])  # Last 3 words

    # print('last_words:', last_words)
    if "Yes" in last_words:
        yes_or_no = "Yes"
        
    elif "No" in last_words:
        yes_or_no = "No"
    else:
        yes_or_no = None
        
    print('yes_or_no',yes_or_no)
    

    merged_data = {
        'question': problem,
        'final_answer': final_answer,
        'reasining_steps': steps,
        'yes_or_no': yes_or_no,
        'response': response
        
    }
        

    return merged_data
    
    
    
new_file_path = '/data/zeju/O1_data/0311_test_training_new_processed.jsonl'
data_all = read_jsonl(new_file_path)
print(len(data_all))
output =  []
zero = 0
for data in data_all:
    print(data_all.index(data))
    problem = data['question']
    steps_ori = data['process']
    labels = data['label']
    final_answer = data['answer']
    steps  = steps_ori.split('\n\n')
    steps[0] = problem + ' ' + steps[0]
    # print('steps:',steps)
    steps_updated = steps[0:len(steps)-1]
    
    
    if zero in data['label']:

        merged_data = evaluate_llm_as_judge(problem=problem, steps=steps_updated, final_answer= final_answer, output_type='bool')
        if merged_data != None:
            output.append(merged_data)
        
    else:
        merged_data = {
        'question': problem,
        'final_answer': final_answer,
        'reasining_steps': steps_updated,
        'yes_or_no': "No",
        'response': '<think>\n\n</think>-1'
        }
        output.append(merged_data)
    


output_file = '/data/zeju/O1_data/0312_test_80_washdata.jsonl'
with open(output_file, 'w', encoding='utf-8') as output_file:
    for entry in output:
        output_file.write(json.dumps(entry, ensure_ascii=False) + '\n')

print(f"数据已成功写入 {output_file}")