GenAI-Arena / arena_elo /update_elo_rating.sh
DongfuJiang's picture
Merge branch 'main' of https://huggingface.co/spaces/TIGER-Lab/GenAI-Arena
46d61db
raw
history blame
5.09 kB
# set LOGDIR to default if not set before
if [ -z "$LOGDIR" ]; then
echo "LOGDIR is not set. Using default '../GenAI-Arena-hf-logs/vote_log'"
export LOGDIR="../GenAI-Arena-hf-logs/vote_log"
fi
set -e
# exit if logdir does not exist
if [ ! -d "$LOGDIR" ]; then
echo "LOGDIR does not exist. Please check the path."
exit 1
fi
mkdir -p results
# # for battle data
python -m elo_rating.clean_battle_data --task_name "image_editing"
edition_battle_cutoff_date=`cat cut_off_date.txt` && rm cut_off_date.txt && echo "Image editing battle data last updated on $edition_battle_cutoff_date"
python -m elo_rating.clean_battle_data --task_name "t2i_generation"
generation_battle_cutoff_date=`cat cut_off_date.txt` && rm cut_off_date.txt && echo "T2I image generation battle data last updated on $generation_battle_cutoff_date"
python -m elo_rating.clean_battle_data --task_name "video_generation"
video_generation_battle_cutoff_date=`cat cut_off_date.txt` && rm cut_off_date.txt && echo "Video generation battle data last updated on $video_generation_battle_cutoff_date"
mkdir -p ./results/$edition_battle_cutoff_date
mkdir -p ./results/$generation_battle_cutoff_date
mkdir -p ./results/$video_generation_battle_cutoff_date
cp clean_battle_image_editing_$edition_battle_cutoff_date.json ./results/latest/clean_battle_image_editing.json
cp clean_battle_t2i_generation_$generation_battle_cutoff_date.json ./results/latest/clean_battle_t2i_generation.json
cp clean_battle_video_generation_$video_generation_battle_cutoff_date.json ./results/latest/clean_battle_video_generation.json
mv clean_battle_image_editing_$edition_battle_cutoff_date.json ./results/$edition_battle_cutoff_date/clean_battle_image_editing.json
mv clean_battle_t2i_generation_$generation_battle_cutoff_date.json ./results/$generation_battle_cutoff_date/clean_battle_t2i_generation.json
mv clean_battle_video_generation_$video_generation_battle_cutoff_date.json ./results/$video_generation_battle_cutoff_date/clean_battle_video_generation.json
echo "Calculating Elo rating for image editing task"
python3 -m elo_rating.elo_analysis --clean-battle-file ./results/$edition_battle_cutoff_date/clean_battle_image_editing.json
edition_battle_cutoff_date=`cat cut_off_date.txt` && rm cut_off_date.txt && echo "Image editing battle data (after filtering models with < 50 battles) last updated on $edition_battle_cutoff_date"
mv ./elo_results_$edition_battle_cutoff_date.pkl ./results/$edition_battle_cutoff_date/elo_results_image_editing.pkl
echo "Calculating Elo rating for t2i generation task"
python3 -m elo_rating.elo_analysis --clean-battle-file ./results/$generation_battle_cutoff_date/clean_battle_t2i_generation.json
generation_battle_cutoff_date=`cat cut_off_date.txt` && rm cut_off_date.txt && echo "T2I image generation battle data (after filtering models with < 50 battles) last updated on $generation_battle_cutoff_date"
mv ./elo_results_$generation_battle_cutoff_date.pkl ./results/$generation_battle_cutoff_date/elo_results_t2i_generation.pkl
echo "Calculating Elo rating for video generation task"
python3 -m elo_rating.elo_analysis --clean-battle-file ./results/$video_generation_battle_cutoff_date/clean_battle_video_generation.json
video_generation_battle_cutoff_date=`cat cut_off_date.txt` && rm cut_off_date.txt && echo "Video generation battle data (after filtering models with < 50 battles) last updated on $video_generation_battle_cutoff_date"
mv ./elo_results_$video_generation_battle_cutoff_date.pkl ./results/$video_generation_battle_cutoff_date/elo_results_video_generation.pkl
# generat the leaderboard
python -m elo_rating.generate_leaderboard \
--elo_rating_pkl "./results/$edition_battle_cutoff_date/elo_results_image_editing.pkl" \
--output_csv "./results/$edition_battle_cutoff_date/image_editing_leaderboard.csv"
python -m elo_rating.generate_leaderboard \
--elo_rating_pkl "./results/$generation_battle_cutoff_date/elo_results_t2i_generation.pkl" \
--output_csv "./results/$generation_battle_cutoff_date/t2i_generation_leaderboard.csv"
python -m elo_rating.generate_leaderboard \
--elo_rating_pkl "./results/$video_generation_battle_cutoff_date/elo_results_video_generation.pkl" \
--output_csv "./results/$video_generation_battle_cutoff_date/video_generation_leaderboard.csv"
mkdir -p ./results/latest
cp ./results/$edition_battle_cutoff_date/image_editing_leaderboard.csv ./results/latest/image_editing_leaderboard.csv
cp ./results/$generation_battle_cutoff_date/t2i_generation_leaderboard.csv ./results/latest/t2i_generation_leaderboard.csv
cp ./results/$video_generation_battle_cutoff_date/video_generation_leaderboard.csv ./results/latest/video_generation_leaderboard.csv
cp ./results/$edition_battle_cutoff_date/elo_results_image_editing.pkl ./results/latest/elo_results_image_editing.pkl
cp ./results/$generation_battle_cutoff_date/elo_results_t2i_generation.pkl ./results/latest/elo_results_t2i_generation.pkl
cp ./results/$video_generation_battle_cutoff_date/elo_results_video_generation.pkl ./results/latest/elo_results_video_generation.pkl