hjkim00's picture
Restore all essential files - code, configs, and MBPP/HumanEval data
24c2665 verified
#!/bin/bash
# GPU 6번 사용 설정
export CUDA_VISIBLE_DEVICES=6
export HUMANEVAL_OVERRIDE_PATH=/home/ubuntu/RLVR/Absolute-Zero-Reasoner/evaluation/code_eval/data/HumanEvalPlus.jsonl
export MBPP_OVERRIDE_PATH=/home/ubuntu/RLVR/Absolute-Zero-Reasoner/evaluation/code_eval/data/MbppPlus.jsonl
# Set defaults if not specified - fix argument assignments
DATASET=${1:-humaneval}
MODEL=${2:-"andrewzh/Absolute_Zero_Reasoner-Coder-3b"}
GREEDY=${3:-1}
TEMP=${4:-0.8}
TOP_P=${5:-0.9}
N_SAMPLES=${6:-1}
# If greedy mode, force n_samples to 1
if [ "$GREEDY" -eq 1 ]; then
N_SAMPLES=1
fi
echo "Dataset: $DATASET"
echo "Model: $MODEL"
echo "Greedy: $GREEDY (1=yes, 0=no)"
echo "Temperature: $TEMP"
echo "Top-P: $TOP_P"
echo "Number of samples: $N_SAMPLES"
# Extract model identifier for output file
MODEL_BASE=$(basename "$MODEL")
echo "Model base: $MODEL_BASE"
# Execute command directly without quoting the arguments
if [ "$GREEDY" -eq 1 ]; then
evalplus.codegen --model "$MODEL" \
--dataset $DATASET \
--backend vllm \
--trust_remote_code \
--greedy
TEMP_VAL="0.0"
else
evalplus.codegen --model "$MODEL" \
--dataset $DATASET \
--backend vllm \
--temperature $TEMP \
--top-p $TOP_P \
--trust_remote_code \
--n-samples $N_SAMPLES
TEMP_VAL="$TEMP"
fi
# The actual output file - use a glob pattern to find the file
echo "Waiting for output file to be generated..."
sleep 2 # Give some time for the file to be created
# Use find to locate the file with a more flexible pattern that matches actual filename format
OUTPUT_FILE=$(find "evalplus_results/${DATASET}" -name "*${MODEL_BASE}_vllm_temp_${TEMP_VAL}.jsonl" ! -name "*.raw.jsonl" -type f | head -n 1)
# Run evaluation with found file
evalplus.evaluate --dataset "$DATASET" \
--samples "$OUTPUT_FILE" \
--min-time-limit 10.0 \
--gt-time-limit-factor 8.0
echo "Evaluation complete. Results saved to evalplus_results/${DATASET}/${MODEL_BASE}_eval_results.json"