#!/usr/bin/env bash
#
# Copyright 2024 PKU-Alignment Team. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

export CC=/data/align-anything/miniconda3/envs/hantao_stable/bin/gcc
export CXX=/data/align-anything/miniconda3/envs/hantao_stable/bin/g++

export TRITON_CACHE_DIR="/home/align-anything/cache/triton"

export WANDB_API_KEY="7e2dcc0c310ebcb7cdcafd5e9320d6be55cf1a33"
export WANDB_MODE=online

MODEL_NAME_OR_PATH="/data/align-anything/hantao/models/chameleon-7b"

DATASET_PATH=(
    "/data/align-anything/hantao/data/mm_interp/AA_preference_cocour_new_step10/tokenized"
    "/data/align-anything/hantao/data/mm_interp/AA_preference_cosi_new_step10/tokenized"
    "/data/align-anything/hantao/data/mm_interp/AA_preference_l0_new_step10/tokenized"
    "/data/align-anything/hantao/data/mm_interp/AA_preference_random/tokenized"
)

DATASET_NAME=(
    "q0_10_preference"
    "q0_20_preference"
    "q0_30_preference"
    "q0_40_preference"
    "q0_50_preference"
    "q0_60_preference"
    "q0_70_preference"
    "q0_80_preference"
    "q0_90_preference"
)

OUTPUT_PATH="/data/align-anything/hantao/align-anything/outputs/mm_interp"
mkdir -p $OUTPUT_PATH

# Initialize variables

for dataset_path in ${DATASET_PATH[@]}; do
    for dataset_name in ${DATASET_NAME[@]}; do
        TRAIN_DATASETS=$dataset_path
        
        # dataset middle name
        middle_name= echo "$dataset_path" | awk -F'/' '{print $(NF-1)}'
        OUTPUT_DIR=$OUTPUT_PATH/$middle_name/$dataset_name
        mkdir -p $OUTPUT_DIR
        echo "Training on $TRAIN_DATASETS, output to $OUTPUT_DIR"
        # Source the setup script
        source ./setup.sh

        # Execute deepspeed command
        deepspeed \
            --master_port ${MASTER_PORT} \
            --module align_anything.trainers.text_image_to_text_image.dpo \
            --model_name_or_path ${MODEL_NAME_OR_PATH} \
            --train_datasets ${TRAIN_DATASETS} \
            --output_dir ${OUTPUT_DIR} \
            --per_device_train_batch_size 4 \
            --per_device_eval_batch_size 4 \
            --gradient_accumulation_steps 2 \
            --train_template Chameleon_preference \
            --train_split train \
            --train_data_files ${dataset_name}.pt \
            --learning_rate 1e-6 \
            --epochs 3 \
            --lr_scheduler_type cosine \
            --save_interval 400 

        bash /data/align-anything/hantao/align-anything/outputs/cut.sh $OUTPUT_DIR
    done
done