File size: 2,144 Bytes

24c2665

#!/bin/bash

# GPU 6번 사용 설정 
export CUDA_VISIBLE_DEVICES=6

export HUMANEVAL_OVERRIDE_PATH=/home/ubuntu/RLVR/Absolute-Zero-Reasoner/evaluation/code_eval/data/HumanEvalPlus.jsonl
export MBPP_OVERRIDE_PATH=/home/ubuntu/RLVR/Absolute-Zero-Reasoner/evaluation/code_eval/data/MbppPlus.jsonl

# Set defaults if not specified - fix argument assignments
DATASET=${1:-humaneval}
MODEL=${2:-"andrewzh/Absolute_Zero_Reasoner-Coder-3b"}
GREEDY=${3:-1}
TEMP=${4:-0.8}
TOP_P=${5:-0.9}
N_SAMPLES=${6:-1}

# If greedy mode, force n_samples to 1
if [ "$GREEDY" -eq 1 ]; then
    N_SAMPLES=1
fi

echo "Dataset: $DATASET"
echo "Model: $MODEL"
echo "Greedy: $GREEDY (1=yes, 0=no)"
echo "Temperature: $TEMP"
echo "Top-P: $TOP_P"
echo "Number of samples: $N_SAMPLES"

# Extract model identifier for output file
MODEL_BASE=$(basename "$MODEL")
echo "Model base: $MODEL_BASE"

# Execute command directly without quoting the arguments
if [ "$GREEDY" -eq 1 ]; then
    evalplus.codegen --model "$MODEL" \
                    --dataset $DATASET \
                    --backend vllm \
                    --trust_remote_code \
                    --greedy
    TEMP_VAL="0.0"
else
    evalplus.codegen --model "$MODEL" \
                    --dataset $DATASET \
                    --backend vllm \
                    --temperature $TEMP \
                    --top-p $TOP_P \
                    --trust_remote_code \
                    --n-samples $N_SAMPLES
    TEMP_VAL="$TEMP"
fi

# The actual output file - use a glob pattern to find the file
echo "Waiting for output file to be generated..."
sleep 2  # Give some time for the file to be created

# Use find to locate the file with a more flexible pattern that matches actual filename format
OUTPUT_FILE=$(find "evalplus_results/${DATASET}" -name "*${MODEL_BASE}_vllm_temp_${TEMP_VAL}.jsonl" ! -name "*.raw.jsonl" -type f | head -n 1)

# Run evaluation with found file
evalplus.evaluate --dataset "$DATASET" \
    --samples "$OUTPUT_FILE" \
    --min-time-limit 10.0 \
    --gt-time-limit-factor 8.0

echo "Evaluation complete. Results saved to evalplus_results/${DATASET}/${MODEL_BASE}_eval_results.json"