# GPU 6번 사용 설정 | |
export CUDA_VISIBLE_DEVICES=6 | |
export HUMANEVAL_OVERRIDE_PATH=/home/ubuntu/RLVR/Absolute-Zero-Reasoner/evaluation/code_eval/data/HumanEvalPlus.jsonl | |
export MBPP_OVERRIDE_PATH=/home/ubuntu/RLVR/Absolute-Zero-Reasoner/evaluation/code_eval/data/MbppPlus.jsonl | |
# Set defaults if not specified - fix argument assignments | |
DATASET=${1:-humaneval} | |
MODEL=${2:-"andrewzh/Absolute_Zero_Reasoner-Coder-3b"} | |
GREEDY=${3:-1} | |
TEMP=${4:-0.8} | |
TOP_P=${5:-0.9} | |
N_SAMPLES=${6:-1} | |
# If greedy mode, force n_samples to 1 | |
if [ "$GREEDY" -eq 1 ]; then | |
N_SAMPLES=1 | |
fi | |
echo "Dataset: $DATASET" | |
echo "Model: $MODEL" | |
echo "Greedy: $GREEDY (1=yes, 0=no)" | |
echo "Temperature: $TEMP" | |
echo "Top-P: $TOP_P" | |
echo "Number of samples: $N_SAMPLES" | |
# Extract model identifier for output file | |
MODEL_BASE=$(basename "$MODEL") | |
echo "Model base: $MODEL_BASE" | |
# Execute command directly without quoting the arguments | |
if [ "$GREEDY" -eq 1 ]; then | |
evalplus.codegen --model "$MODEL" \ | |
--dataset $DATASET \ | |
--backend vllm \ | |
--trust_remote_code \ | |
--greedy | |
TEMP_VAL="0.0" | |
else | |
evalplus.codegen --model "$MODEL" \ | |
--dataset $DATASET \ | |
--backend vllm \ | |
--temperature $TEMP \ | |
--top-p $TOP_P \ | |
--trust_remote_code \ | |
--n-samples $N_SAMPLES | |
TEMP_VAL="$TEMP" | |
fi | |
# The actual output file - use a glob pattern to find the file | |
echo "Waiting for output file to be generated..." | |
sleep 2 # Give some time for the file to be created | |
# Use find to locate the file with a more flexible pattern that matches actual filename format | |
OUTPUT_FILE=$(find "evalplus_results/${DATASET}" -name "*${MODEL_BASE}_vllm_temp_${TEMP_VAL}.jsonl" ! -name "*.raw.jsonl" -type f | head -n 1) | |
# Run evaluation with found file | |
evalplus.evaluate --dataset "$DATASET" \ | |
--samples "$OUTPUT_FILE" \ | |
--min-time-limit 10.0 \ | |
--gt-time-limit-factor 8.0 | |
echo "Evaluation complete. Results saved to evalplus_results/${DATASET}/${MODEL_BASE}_eval_results.json" |