Upload folder using huggingface_hub
Browse files- calc_avg_comet.py +10 -0
- comet_api.py +60 -0
- ds_z3_config.json +30 -0
- infer.sh +79 -0
- infer_2.sh +79 -0
- infer_robust.sh +90 -0
- infer_robust_2.sh +83 -0
- merge_template.yaml +13 -0
- nohup.out +32 -0
- preprocess_robust.py +39 -0
- qwen2.5_full_sft.yaml +50 -0
- qwen2.5_lora_dpo.yaml +50 -0
- run_eval_cohesion.sh +41 -0
- run_eval_comet_api.sh +40 -0
- run_eval_robust.sh +47 -0
- run_merge_fix.sh +24 -0
- run_train.sh +25 -0
- test_api.py +28 -0
- vllm_1000.log +0 -0
- vllm_1000_2.log +0 -0
- vllm_1200.log +0 -0
- vllm_1400.log +0 -0
- vllm_1600.log +0 -0
- vllm_1800.log +158 -0
- vllm_600.log +0 -0
- vllm_600_2.log +0 -0
- vllm_800.log +0 -0
- vllm_800_2.log +0 -0
calc_avg_comet.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
with open(sys.argv[1], 'r') as f:
|
| 5 |
+
lines = [line.strip() for line in f]
|
| 6 |
+
|
| 7 |
+
scores = [float(line.split()[-1]) for line in lines]
|
| 8 |
+
|
| 9 |
+
with open(sys.argv[1], 'a') as f:
|
| 10 |
+
f.write(f'Average score: {sum(scores) / len(scores) * 100:.2f}\n')
|
comet_api.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import requests
|
| 3 |
+
import time
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def get_comet_score(instances: list[dict], timeout=100, max_retries=10, comet_api: str=None):
|
| 8 |
+
if comet_api is not None:
|
| 9 |
+
url = f"http://{comet_api}/evaluate"
|
| 10 |
+
else:
|
| 11 |
+
url = f"http://{os.getenv('COMET_API')}/evaluate"
|
| 12 |
+
payload = {'instances': instances}
|
| 13 |
+
|
| 14 |
+
retries = 0
|
| 15 |
+
while retries < max_retries:
|
| 16 |
+
try:
|
| 17 |
+
response = requests.post(url, json=payload, timeout=timeout)
|
| 18 |
+
|
| 19 |
+
if response.status_code == 200:
|
| 20 |
+
# print(response.json()) # {'score': ...}
|
| 21 |
+
return response.json()['scores']
|
| 22 |
+
else:
|
| 23 |
+
print(f"Request failed with status code: {response.status_code}")
|
| 24 |
+
except requests.Timeout:
|
| 25 |
+
retries += 1
|
| 26 |
+
print(f"Request timed out. Retrying... ({retries}/{max_retries})")
|
| 27 |
+
time.sleep(5)
|
| 28 |
+
except requests.RequestException as e:
|
| 29 |
+
raise RuntimeError(f"Request failed due to: {e}")
|
| 30 |
+
|
| 31 |
+
raise RuntimeError("Max retries exceeded. Request failed.")
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def main():
|
| 35 |
+
parser = argparse.ArgumentParser()
|
| 36 |
+
parser.add_argument('--source_file', '-s', type=str, required=True)
|
| 37 |
+
parser.add_argument('--target_file', '-t', type=str, required=True)
|
| 38 |
+
parser.add_argument('--reference_file', '-r', type=str, required=True)
|
| 39 |
+
parser.add_argument('--url', '-u', type=str, required=True)
|
| 40 |
+
args = parser.parse_args()
|
| 41 |
+
|
| 42 |
+
source_file = args.source_file
|
| 43 |
+
target_file = args.target_file
|
| 44 |
+
reference_file = args.reference_file
|
| 45 |
+
comet_api = args.url
|
| 46 |
+
|
| 47 |
+
with open(source_file, 'r') as f:
|
| 48 |
+
source_lines = f.readlines()
|
| 49 |
+
with open(target_file, 'r') as f:
|
| 50 |
+
target_lines = f.readlines()
|
| 51 |
+
with open(reference_file, 'r') as f:
|
| 52 |
+
reference_lines = f.readlines()
|
| 53 |
+
|
| 54 |
+
line_comet_scores = get_comet_score([{'src': i, 'mt': j, 'ref': k} for i, j, k in zip(source_lines, target_lines, reference_lines)], comet_api=comet_api)
|
| 55 |
+
avg_score = sum(line_comet_scores) / len(line_comet_scores) if line_comet_scores and len(line_comet_scores) > 0 else -1.0
|
| 56 |
+
print(f'{target_file}\tscore: {avg_score:.4f}')
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
if __name__ == '__main__':
|
| 60 |
+
main()
|
ds_z3_config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"train_batch_size": "auto",
|
| 3 |
+
"train_micro_batch_size_per_gpu": "auto",
|
| 4 |
+
"gradient_accumulation_steps": "auto",
|
| 5 |
+
"gradient_clipping": "auto",
|
| 6 |
+
"zero_allow_untested_optimizer": true,
|
| 7 |
+
"fp16": {
|
| 8 |
+
"enabled": "auto",
|
| 9 |
+
"loss_scale": 0,
|
| 10 |
+
"loss_scale_window": 1000,
|
| 11 |
+
"initial_scale_power": 16,
|
| 12 |
+
"hysteresis": 2,
|
| 13 |
+
"min_loss_scale": 1
|
| 14 |
+
},
|
| 15 |
+
"bf16": {
|
| 16 |
+
"enabled": "auto"
|
| 17 |
+
},
|
| 18 |
+
"zero_optimization": {
|
| 19 |
+
"stage": 3,
|
| 20 |
+
"overlap_comm": false,
|
| 21 |
+
"contiguous_gradients": true,
|
| 22 |
+
"sub_group_size": 1e9,
|
| 23 |
+
"reduce_bucket_size": "auto",
|
| 24 |
+
"stage3_prefetch_bucket_size": "auto",
|
| 25 |
+
"stage3_param_persistence_threshold": "auto",
|
| 26 |
+
"stage3_max_live_parameters": 1e9,
|
| 27 |
+
"stage3_max_reuse_distance": 1e9,
|
| 28 |
+
"stage3_gather_16bit_weights_on_model_save": true
|
| 29 |
+
}
|
| 30 |
+
}
|
infer.sh
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
# trap 'ssh wyt@${infer_address%%:*} "killall pt_main_thread"; exit' SIGINT
|
| 3 |
+
|
| 4 |
+
device=$1
|
| 5 |
+
deploy_flag=$2
|
| 6 |
+
step=$3
|
| 7 |
+
|
| 8 |
+
if [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]; then
|
| 9 |
+
echo "Usage: $0 <device> <deploy_flag> <step>"
|
| 10 |
+
exit 1
|
| 11 |
+
fi
|
| 12 |
+
|
| 13 |
+
# model_path=/data/wyt/codes/DocDPO/sft/checkpoints_llama_factory/lora/ted_react_trans_base_sample_sft_dpolora_balanced_474/merged_fix/checkpoint-${step}
|
| 14 |
+
model_path=$4
|
| 15 |
+
|
| 16 |
+
language=$5
|
| 17 |
+
src_lang=${language%-*}
|
| 18 |
+
tgt_lang=${language#*-}
|
| 19 |
+
|
| 20 |
+
# infer_address=10.249.42.177:8010
|
| 21 |
+
# schedule_address=10.249.42.177:8011
|
| 22 |
+
# infer_address=127.0.0.1:801$infer_device
|
| 23 |
+
# schedule_address=127.0.0.1:801$schedule_device
|
| 24 |
+
# address=10.249.42.182:801${device}
|
| 25 |
+
address=127.0.0.1:801${device}
|
| 26 |
+
|
| 27 |
+
# setting=window_20_1ep
|
| 28 |
+
# setting=window_20_2ep_new
|
| 29 |
+
work_dir=/data/wyt/codes/DocDPO/inference_monolang/ted_en_zh_balanced_paritial
|
| 30 |
+
|
| 31 |
+
if [ "$deploy_flag" = "true" ]; then
|
| 32 |
+
if [ "${address%%:*}" = "127.0.0.1" ]; then
|
| 33 |
+
source ~/.zshrc
|
| 34 |
+
conda activate vllm
|
| 35 |
+
CUDA_VISIBLE_DEVICES=${device} nohup vllm serve ${model_path} --host 0.0.0.0 --port ${address##*:} --served-model-name "qwen" --enable-prefix-caching --gpu_memory_utilization 0.9 > vllm_${step}.log 2>&1 &
|
| 36 |
+
conda activate optima-vllm
|
| 37 |
+
else
|
| 38 |
+
ssh -n wyt@${address%%:*} "source ~/.zshrc && conda activate optima-vllm && CUDA_VISIBLE_DEVICES=${device} nohup vllm serve ${model_path} --host 0.0.0.0 --port ${address##*:} --served-model-name "qwen" --enable-prefix-caching > /dev/null 2>&1 &"
|
| 39 |
+
fi
|
| 40 |
+
fi
|
| 41 |
+
|
| 42 |
+
echo "Waiting for LLM deployment in 20 seconds..."
|
| 43 |
+
# sleep 20
|
| 44 |
+
|
| 45 |
+
echo "Testing API of ${address}..."
|
| 46 |
+
while true; do
|
| 47 |
+
python test_api.py $address
|
| 48 |
+
if [ $? -eq 0 ]; then
|
| 49 |
+
echo "API connected successfully!"
|
| 50 |
+
break
|
| 51 |
+
else
|
| 52 |
+
echo "API connection failed. Retrying in 5 seconds..."
|
| 53 |
+
sleep 5
|
| 54 |
+
fi
|
| 55 |
+
done
|
| 56 |
+
|
| 57 |
+
cur_path=`pwd`
|
| 58 |
+
cd $work_dir
|
| 59 |
+
|
| 60 |
+
# for i in 4 9 11; do
|
| 61 |
+
for i in {0..11}; do
|
| 62 |
+
if [ ! -f "$cur_path/${src_lang}-${tgt_lang}_${step}/IWSLT17.TED.tst2017.${src_lang}-${tgt_lang}.${src_lang}.$i.${tgt_lang}" ]; then
|
| 63 |
+
echo IWSLT17.TED.tst2017.${src_lang}-${tgt_lang}.${src_lang}.$i.${tgt_lang}
|
| 64 |
+
python -u infer.py \
|
| 65 |
+
--src_file /data/wyt/codes/DocDPO/data/2017-01-ted-test/${src_lang}-${tgt_lang}/IWSLT17.TED.tst2017.${src_lang}-${tgt_lang}.${src_lang}.$i \
|
| 66 |
+
--output_path $cur_path/results/${src_lang}-${tgt_lang}_${step} \
|
| 67 |
+
--window_size 10 \
|
| 68 |
+
--infer_address $address \
|
| 69 |
+
--schedule_address $address \
|
| 70 |
+
--language ${src_lang}-${tgt_lang} \
|
| 71 |
+
--infer_temperature 0.7 \
|
| 72 |
+
--schedule_temperature 0.7 \
|
| 73 |
+
--translate_style base
|
| 74 |
+
fi
|
| 75 |
+
done
|
| 76 |
+
|
| 77 |
+
cd $cur_path
|
| 78 |
+
|
| 79 |
+
# ssh wyt@${infer_address%%:*} "killall pt_main_thread"
|
infer_2.sh
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
# trap 'ssh wyt@${infer_address%%:*} "killall pt_main_thread"; exit' SIGINT
|
| 3 |
+
|
| 4 |
+
device=$1
|
| 5 |
+
deploy_flag=$2
|
| 6 |
+
step=$3
|
| 7 |
+
|
| 8 |
+
if [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]; then
|
| 9 |
+
echo "Usage: $0 <device> <deploy_flag> <step>"
|
| 10 |
+
exit 1
|
| 11 |
+
fi
|
| 12 |
+
|
| 13 |
+
# model_path=/data/wyt/codes/DocDPO/sft/checkpoints_llama_factory/lora/ted_react_trans_base_sample_sft_dpolora_balanced_474/merged_fix/checkpoint-${step}
|
| 14 |
+
model_path=$4
|
| 15 |
+
|
| 16 |
+
language=$5
|
| 17 |
+
src_lang=${language%-*}
|
| 18 |
+
tgt_lang=${language#*-}
|
| 19 |
+
|
| 20 |
+
# infer_address=10.249.42.177:8010
|
| 21 |
+
# schedule_address=10.249.42.177:8011
|
| 22 |
+
# infer_address=127.0.0.1:801$infer_device
|
| 23 |
+
# schedule_address=127.0.0.1:801$schedule_device
|
| 24 |
+
# address=10.249.42.182:801${device}
|
| 25 |
+
address=127.0.0.1:800${device}
|
| 26 |
+
|
| 27 |
+
# setting=window_20_1ep
|
| 28 |
+
# setting=window_20_2ep_new
|
| 29 |
+
work_dir=/data/wyt/codes/DocDPO/inference_monolang/ted_en_zh_balanced_paritial
|
| 30 |
+
|
| 31 |
+
if [ "$deploy_flag" = "true" ]; then
|
| 32 |
+
if [ "${address%%:*}" = "127.0.0.1" ]; then
|
| 33 |
+
source ~/.zshrc
|
| 34 |
+
conda activate vllm
|
| 35 |
+
CUDA_VISIBLE_DEVICES=${device} nohup vllm serve ${model_path} --host 0.0.0.0 --port ${address##*:} --served-model-name "qwen" --enable-prefix-caching --gpu_memory_utilization 0.48 > vllm_${step}_2.log 2>&1 &
|
| 36 |
+
conda activate optima-vllm
|
| 37 |
+
else
|
| 38 |
+
ssh -n wyt@${address%%:*} "source ~/.zshrc && conda activate optima-vllm && CUDA_VISIBLE_DEVICES=${device} nohup vllm serve ${model_path} --host 0.0.0.0 --port ${address##*:} --served-model-name "qwen" --enable-prefix-caching > /dev/null 2>&1 &"
|
| 39 |
+
fi
|
| 40 |
+
fi
|
| 41 |
+
|
| 42 |
+
echo "Waiting for LLM deployment in 20 seconds..."
|
| 43 |
+
# sleep 20
|
| 44 |
+
|
| 45 |
+
echo "Testing API of ${address}..."
|
| 46 |
+
while true; do
|
| 47 |
+
python test_api.py $address
|
| 48 |
+
if [ $? -eq 0 ]; then
|
| 49 |
+
echo "API connected successfully!"
|
| 50 |
+
break
|
| 51 |
+
else
|
| 52 |
+
echo "API connection failed. Retrying in 5 seconds..."
|
| 53 |
+
sleep 5
|
| 54 |
+
fi
|
| 55 |
+
done
|
| 56 |
+
|
| 57 |
+
cur_path=`pwd`
|
| 58 |
+
cd $work_dir
|
| 59 |
+
|
| 60 |
+
# for i in 4 9 11; do
|
| 61 |
+
for i in {0..11}; do
|
| 62 |
+
if [ ! -f "$cur_path/${src_lang}-${tgt_lang}_${step}/IWSLT17.TED.tst2017.${src_lang}-${tgt_lang}.${src_lang}.$i.${tgt_lang}" ]; then
|
| 63 |
+
echo IWSLT17.TED.tst2017.${src_lang}-${tgt_lang}.${src_lang}.$i.${tgt_lang}
|
| 64 |
+
python -u infer.py \
|
| 65 |
+
--src_file /data/wyt/codes/DocDPO/data/2017-01-ted-test/${src_lang}-${tgt_lang}/IWSLT17.TED.tst2017.${src_lang}-${tgt_lang}.${src_lang}.$i \
|
| 66 |
+
--output_path $cur_path/results/${src_lang}-${tgt_lang}_${step} \
|
| 67 |
+
--window_size 10 \
|
| 68 |
+
--infer_address $address \
|
| 69 |
+
--schedule_address $address \
|
| 70 |
+
--language ${src_lang}-${tgt_lang} \
|
| 71 |
+
--infer_temperature 0.7 \
|
| 72 |
+
--schedule_temperature 0.7 \
|
| 73 |
+
--translate_style base
|
| 74 |
+
fi
|
| 75 |
+
done
|
| 76 |
+
|
| 77 |
+
cd $cur_path
|
| 78 |
+
|
| 79 |
+
# ssh wyt@${infer_address%%:*} "killall pt_main_thread"
|
infer_robust.sh
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
# trap 'ssh wyt@${infer_address%%:*} "killall pt_main_thread"; exit' SIGINT
|
| 3 |
+
|
| 4 |
+
device=$1
|
| 5 |
+
deploy_flag=$2
|
| 6 |
+
step=$3
|
| 7 |
+
|
| 8 |
+
if [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]; then
|
| 9 |
+
echo "Usage: $0 <device> <deploy_flag> <step>"
|
| 10 |
+
exit 1
|
| 11 |
+
fi
|
| 12 |
+
|
| 13 |
+
# model_path=/data/wyt/codes/DocDPO/sft/checkpoints_llama_factory/lora/ted_react_trans_base_sample_sft_dpolora_balanced_474/merged_fix/checkpoint-${step}
|
| 14 |
+
model_path=$4
|
| 15 |
+
|
| 16 |
+
language=$5
|
| 17 |
+
src_lang=${language%-*}
|
| 18 |
+
tgt_lang=${language#*-}
|
| 19 |
+
|
| 20 |
+
# infer_address=10.249.42.177:8010
|
| 21 |
+
# schedule_address=10.249.42.177:8011
|
| 22 |
+
# infer_address=127.0.0.1:801$infer_device
|
| 23 |
+
# schedule_address=127.0.0.1:801$schedule_device
|
| 24 |
+
# address=10.249.42.182:801${device}
|
| 25 |
+
# address=127.0.0.1:801${device}
|
| 26 |
+
# address=10.249.45.139:801${device}
|
| 27 |
+
address=${device}
|
| 28 |
+
|
| 29 |
+
level=$6
|
| 30 |
+
|
| 31 |
+
# setting=window_20_1ep
|
| 32 |
+
# setting=window_20_2ep_new
|
| 33 |
+
work_dir=/data/wyt/codes/DocDPO/inference_monolang/ted_en_zh_balanced_paritial
|
| 34 |
+
# data_dir=/data/wyt/codes/DocDPO/data/2017-01-ted-test
|
| 35 |
+
data_dir=/data/wyt/codes/DocDPO/data/ted_robust/level_${level}
|
| 36 |
+
output_dir=/data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/results_robust/level_${level}
|
| 37 |
+
|
| 38 |
+
if [ "$deploy_flag" = "true" ]; then
|
| 39 |
+
if [ "${address%%:*}" = "127.0.0.1" ]; then
|
| 40 |
+
source ~/.zshrc
|
| 41 |
+
conda activate vllm
|
| 42 |
+
CUDA_VISIBLE_DEVICES=${device} nohup vllm serve ${model_path} --host 0.0.0.0 --port ${address##*:} --served-model-name "qwen" --enable-prefix-caching --gpu_memory_utilization 0.9 > vllm_${step}.log 2>&1 &
|
| 43 |
+
conda activate optima-vllm
|
| 44 |
+
else
|
| 45 |
+
ssh -n wyt@${address%%:*} "source ~/.zshrc && conda activate optima-vllm && CUDA_VISIBLE_DEVICES=${device} nohup vllm serve ${model_path} --host 0.0.0.0 --port ${address##*:} --served-model-name "qwen" --enable-prefix-caching > /dev/null 2>&1 &"
|
| 46 |
+
fi
|
| 47 |
+
fi
|
| 48 |
+
|
| 49 |
+
echo "Waiting for LLM deployment in 20 seconds..."
|
| 50 |
+
# sleep 20
|
| 51 |
+
|
| 52 |
+
echo "Testing API of ${address}..."
|
| 53 |
+
while true; do
|
| 54 |
+
python test_api.py $address
|
| 55 |
+
if [ $? -eq 0 ]; then
|
| 56 |
+
echo "API connected successfully!"
|
| 57 |
+
break
|
| 58 |
+
else
|
| 59 |
+
echo "API connection failed. Retrying in 5 seconds..."
|
| 60 |
+
sleep 5
|
| 61 |
+
fi
|
| 62 |
+
done
|
| 63 |
+
|
| 64 |
+
cur_path=`pwd`
|
| 65 |
+
cd $work_dir
|
| 66 |
+
|
| 67 |
+
doc_ids=("${@:7}")
|
| 68 |
+
echo "Document IDs to process: ${doc_ids[@]}"
|
| 69 |
+
|
| 70 |
+
# for i in {0..5}; do
|
| 71 |
+
# for i in {0..11}; do
|
| 72 |
+
for i in "${doc_ids[@]}"; do
|
| 73 |
+
if [ ! -f "$output_dir/${src_lang}-${tgt_lang}_${step}/IWSLT17.TED.tst2017.${src_lang}-${tgt_lang}.${src_lang}.$i.${tgt_lang}" ]; then
|
| 74 |
+
echo IWSLT17.TED.tst2017.${src_lang}-${tgt_lang}.${src_lang}.$i.${tgt_lang}
|
| 75 |
+
python -u infer.py \
|
| 76 |
+
--src_file $data_dir/${src_lang}-${tgt_lang}/IWSLT17.TED.tst2017.${src_lang}-${tgt_lang}.${src_lang}.$i \
|
| 77 |
+
--output_path $output_dir/${src_lang}-${tgt_lang}_${step} \
|
| 78 |
+
--window_size 10 \
|
| 79 |
+
--infer_address $address \
|
| 80 |
+
--schedule_address $address \
|
| 81 |
+
--language ${src_lang}-${tgt_lang} \
|
| 82 |
+
--infer_temperature 0.7 \
|
| 83 |
+
--schedule_temperature 0.7 \
|
| 84 |
+
--translate_style base
|
| 85 |
+
fi
|
| 86 |
+
done
|
| 87 |
+
|
| 88 |
+
cd $cur_path
|
| 89 |
+
|
| 90 |
+
# ssh wyt@${infer_address%%:*} "killall pt_main_thread"
|
infer_robust_2.sh
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
# trap 'ssh wyt@${infer_address%%:*} "killall pt_main_thread"; exit' SIGINT
|
| 3 |
+
|
| 4 |
+
device=$1
|
| 5 |
+
deploy_flag=$2
|
| 6 |
+
step=$3
|
| 7 |
+
|
| 8 |
+
if [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]; then
|
| 9 |
+
echo "Usage: $0 <device> <deploy_flag> <step>"
|
| 10 |
+
exit 1
|
| 11 |
+
fi
|
| 12 |
+
|
| 13 |
+
# model_path=/data/wyt/codes/DocDPO/sft/checkpoints_llama_factory/lora/ted_react_trans_base_sample_sft_dpolora_balanced_474/merged_fix/checkpoint-${step}
|
| 14 |
+
model_path=$4
|
| 15 |
+
|
| 16 |
+
language=$5
|
| 17 |
+
src_lang=${language%-*}
|
| 18 |
+
tgt_lang=${language#*-}
|
| 19 |
+
|
| 20 |
+
# infer_address=10.249.42.177:8010
|
| 21 |
+
# schedule_address=10.249.42.177:8011
|
| 22 |
+
# infer_address=127.0.0.1:801$infer_device
|
| 23 |
+
# schedule_address=127.0.0.1:801$schedule_device
|
| 24 |
+
# address=10.249.42.182:801${device}
|
| 25 |
+
# address=127.0.0.1:801${device}
|
| 26 |
+
address=10.249.45.139:801${device}
|
| 27 |
+
|
| 28 |
+
# setting=window_20_1ep
|
| 29 |
+
# setting=window_20_2ep_new
|
| 30 |
+
work_dir=/data/wyt/codes/DocDPO/inference_monolang/ted_en_zh_balanced_paritial
|
| 31 |
+
# data_dir=/data/wyt/codes/DocDPO/data/2017-01-ted-test
|
| 32 |
+
data_dir=/data/wyt/codes/DocDPO/data/ted_robust/level_3
|
| 33 |
+
output_dir=/data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/results_robust/level_3
|
| 34 |
+
|
| 35 |
+
if [ "$deploy_flag" = "true" ]; then
|
| 36 |
+
if [ "${address%%:*}" = "127.0.0.1" ]; then
|
| 37 |
+
source ~/.zshrc
|
| 38 |
+
conda activate vllm
|
| 39 |
+
CUDA_VISIBLE_DEVICES=${device} nohup vllm serve ${model_path} --host 0.0.0.0 --port ${address##*:} --served-model-name "qwen" --enable-prefix-caching --gpu_memory_utilization 0.9 > vllm_${step}.log 2>&1 &
|
| 40 |
+
conda activate optima-vllm
|
| 41 |
+
else
|
| 42 |
+
ssh -n wyt@${address%%:*} "source ~/.zshrc && conda activate optima-vllm && CUDA_VISIBLE_DEVICES=${device} nohup vllm serve ${model_path} --host 0.0.0.0 --port ${address##*:} --served-model-name "qwen" --enable-prefix-caching > /dev/null 2>&1 &"
|
| 43 |
+
fi
|
| 44 |
+
fi
|
| 45 |
+
|
| 46 |
+
echo "Waiting for LLM deployment in 20 seconds..."
|
| 47 |
+
# sleep 20
|
| 48 |
+
|
| 49 |
+
echo "Testing API of ${address}..."
|
| 50 |
+
while true; do
|
| 51 |
+
python test_api.py $address
|
| 52 |
+
if [ $? -eq 0 ]; then
|
| 53 |
+
echo "API connected successfully!"
|
| 54 |
+
break
|
| 55 |
+
else
|
| 56 |
+
echo "API connection failed. Retrying in 5 seconds..."
|
| 57 |
+
sleep 5
|
| 58 |
+
fi
|
| 59 |
+
done
|
| 60 |
+
|
| 61 |
+
cur_path=`pwd`
|
| 62 |
+
cd $work_dir
|
| 63 |
+
|
| 64 |
+
for i in {6..11}; do
|
| 65 |
+
# for i in {0..11}; do
|
| 66 |
+
if [ ! -f "$output_dir/${src_lang}-${tgt_lang}_${step}/IWSLT17.TED.tst2017.${src_lang}-${tgt_lang}.${src_lang}.$i.${tgt_lang}" ]; then
|
| 67 |
+
echo IWSLT17.TED.tst2017.${src_lang}-${tgt_lang}.${src_lang}.$i.${tgt_lang}
|
| 68 |
+
python -u infer.py \
|
| 69 |
+
--src_file $data_dir/${src_lang}-${tgt_lang}/IWSLT17.TED.tst2017.${src_lang}-${tgt_lang}.${src_lang}.$i \
|
| 70 |
+
--output_path $output_dir/${src_lang}-${tgt_lang}_${step} \
|
| 71 |
+
--window_size 10 \
|
| 72 |
+
--infer_address $address \
|
| 73 |
+
--schedule_address $address \
|
| 74 |
+
--language ${src_lang}-${tgt_lang} \
|
| 75 |
+
--infer_temperature 0.7 \
|
| 76 |
+
--schedule_temperature 0.7 \
|
| 77 |
+
--translate_style base
|
| 78 |
+
fi
|
| 79 |
+
done
|
| 80 |
+
|
| 81 |
+
cd $cur_path
|
| 82 |
+
|
| 83 |
+
# ssh wyt@${infer_address%%:*} "killall pt_main_thread"
|
merge_template.yaml
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### Note: DO NOT use quantized model or quantization_bit when merging lora adapters
|
| 2 |
+
|
| 3 |
+
### model
|
| 4 |
+
model_name_or_path:
|
| 5 |
+
adapter_name_or_path:
|
| 6 |
+
template: qwen
|
| 7 |
+
trust_remote_code: true
|
| 8 |
+
|
| 9 |
+
### export
|
| 10 |
+
export_dir:
|
| 11 |
+
export_size: 5
|
| 12 |
+
export_device: cpu # choices: [cpu, auto]
|
| 13 |
+
export_legacy_format: false
|
nohup.out
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[2025年 09月 17日 星期三 19:57:29 CST] SFT Training Start
|
| 2 |
+
[2025年 09月 18日 星期四 00:05:57 CST] SFT Training End
|
| 3 |
+
[2025年 09月 18日 星期四 00:05:57 CST] DPO Training Start
|
| 4 |
+
[2025年 09月 18日 星期四 09:27:43 CST] DPO Training End
|
| 5 |
+
[2025年 09月 18日 星期四 09:27:43 CST] Merging Checkpoints
|
| 6 |
+
[2025年 09月 18日 星期四 09:33:12 CST] Merging Checkpoints End
|
| 7 |
+
[2025年 09月 18日 星期四 09:33:12 CST] Inference Start
|
| 8 |
+
[2025年 09月 18日 星期四 09:33:12 CST] Inference End
|
| 9 |
+
[2025年 09月 18日 星期四 10:48:45 CST] Inference Start
|
| 10 |
+
[2025年 09月 18日 星期四 10:48:45 CST] Inference End
|
| 11 |
+
[2025年 09月 18日 星期四 12:38:00 CST] Inference Start
|
| 12 |
+
[2025年 09月 18日 星期四 12:38:00 CST] Inference End
|
| 13 |
+
[2025年 09月 18日 星期四 14:29:12 CST] Merging Checkpoints
|
| 14 |
+
[2025年 09月 18日 星期四 14:31:09 CST] Merging Checkpoints End
|
| 15 |
+
[2025年 09月 18日 星期四 14:31:09 CST] Inference End
|
| 16 |
+
[2025年 09月 18日 星期四 14:34:00 CST] Merging Checkpoints
|
| 17 |
+
[2025年 09月 18日 星期四 14:34:25 CST] Merging Checkpoints
|
| 18 |
+
[2025年 09月 18日 星期四 14:36:15 CST] Merging Checkpoints
|
| 19 |
+
[2025年 09月 18日 星期四 14:37:01 CST] Inference Start
|
| 20 |
+
[2025年 09月 18日 星期四 14:37:01 CST] Inference End
|
| 21 |
+
[2025年 09月 18日 星期四 14:37:29 CST] Inference Start
|
| 22 |
+
[2025年 09月 18日 星期四 14:37:29 CST] Inference End
|
| 23 |
+
[2025年 09月 18日 星期四 14:38:31 CST] Inference Start
|
| 24 |
+
[2025年 09月 18日 星期四 14:38:31 CST] Inference End
|
| 25 |
+
[2025年 09月 18日 星期四 14:41:21 CST] Inference Start
|
| 26 |
+
[2025年 09月 18日 星期四 14:41:21 CST] Inference End
|
| 27 |
+
[2025年 09月 18日 星期四 14:43:00 CST] Inference Start
|
| 28 |
+
[2025年 09月 18日 星期四 14:43:00 CST] Inference End
|
| 29 |
+
[2025年 09月 18日 星期四 17:23:43 CST] Inference Start
|
| 30 |
+
[2025年 09月 18日 星期四 17:23:43 CST] Inference End
|
| 31 |
+
[2025年 09月 18日 星期四 17:24:30 CST] Inference Start
|
| 32 |
+
[2025年 09月 18日 星期四 17:24:30 CST] Inference End
|
preprocess_robust.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def main():
|
| 6 |
+
src_lang, tgt_lang = args.lang_pair.split("-")
|
| 7 |
+
tgt_file_list = [file for file in os.listdir(args.tgt_path) if file.endswith(f".{tgt_lang}")]
|
| 8 |
+
for tgt_file in tgt_file_list:
|
| 9 |
+
src_file = os.path.splitext(tgt_file)[0]
|
| 10 |
+
doc_id = src_file.split('.')[-1]
|
| 11 |
+
label_file = src_file.replace(f".{src_lang}.", ".id.")
|
| 12 |
+
with open(os.path.join(args.disturb_src_path, label_file), "r", encoding="utf-8") as f:
|
| 13 |
+
labels = [line.strip() for line in f]
|
| 14 |
+
with open(os.path.join(args.tgt_path, tgt_file), "r", encoding="utf-8") as f:
|
| 15 |
+
tgt_lines = [line.strip() for line in f]
|
| 16 |
+
|
| 17 |
+
assert len(labels) == len(tgt_lines), f"Length mismatch in {src_file} and {label_file}"
|
| 18 |
+
filterd_tgt_lines = [tgt for tgt, label in zip(tgt_lines, labels) if label.split('-')[0] == doc_id]
|
| 19 |
+
|
| 20 |
+
with open(os.path.join(args.original_src_path, src_file), "r", encoding="utf-8") as f:
|
| 21 |
+
original_src_lines = [line.strip() for line in f]
|
| 22 |
+
assert len(original_src_lines) == len(filterd_tgt_lines), f"Length mismatch in {src_file} and filtered {tgt_file}"
|
| 23 |
+
|
| 24 |
+
with open(os.path.join(args.output_path, tgt_file), "w", encoding="utf-8") as f:
|
| 25 |
+
f.write("\n".join(filterd_tgt_lines) + "\n")
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
if __name__ == "__main__":
|
| 29 |
+
parser = argparse.ArgumentParser()
|
| 30 |
+
parser.add_argument("--original_src_path", type=str)
|
| 31 |
+
parser.add_argument("--disturb_src_path", type=str)
|
| 32 |
+
parser.add_argument("--tgt_path", type=str)
|
| 33 |
+
parser.add_argument("--output_path", type=str)
|
| 34 |
+
parser.add_argument("--lang_pair", type=str)
|
| 35 |
+
args = parser.parse_args()
|
| 36 |
+
|
| 37 |
+
os.makedirs(args.output_path, exist_ok=True)
|
| 38 |
+
|
| 39 |
+
main()
|
qwen2.5_full_sft.yaml
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### model
|
| 2 |
+
model_name_or_path: /data/wyt/codes/checkpoints/Qwen2.5-7B-Instruct
|
| 3 |
+
trust_remote_code: true
|
| 4 |
+
|
| 5 |
+
### method
|
| 6 |
+
stage: sft
|
| 7 |
+
do_train: true
|
| 8 |
+
finetuning_type: full
|
| 9 |
+
deepspeed: /data/wyt/codes/DocDPO/sft/checkpoints_multilang/ds_z3_config.json # choices: [ds_z0_config.json, ds_z2_config.json, ds_z3_config.json]
|
| 10 |
+
|
| 11 |
+
### dataset
|
| 12 |
+
dataset_dir: /data/wyt/codes/DocDPO/sft/data_multilang/red_multilang_base_balanced_en_zhdefr_320
|
| 13 |
+
dataset: sft_en-zh_tool,sft_en-zh_trans_base_sample,sft_en-de_tool,sft_en-de_trans_base_sample,sft_en-fr_tool,sft_en-fr_trans_base_sample
|
| 14 |
+
template: qwen
|
| 15 |
+
cutoff_len: 2560
|
| 16 |
+
# max_samples: 1000
|
| 17 |
+
overwrite_cache: true
|
| 18 |
+
preprocessing_num_workers: 16
|
| 19 |
+
dataloader_num_workers: 4
|
| 20 |
+
mask_history: true
|
| 21 |
+
|
| 22 |
+
### output
|
| 23 |
+
output_dir: /data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/sft
|
| 24 |
+
logging_steps: 5
|
| 25 |
+
# save_steps: 300
|
| 26 |
+
save_strategy: epoch
|
| 27 |
+
plot_loss: true
|
| 28 |
+
overwrite_output_dir: true
|
| 29 |
+
save_only_model: true
|
| 30 |
+
report_to: tensorboard # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
| 31 |
+
|
| 32 |
+
### train
|
| 33 |
+
per_device_train_batch_size: 4
|
| 34 |
+
gradient_accumulation_steps: 2
|
| 35 |
+
# learning_rate: 2.0e-5
|
| 36 |
+
# learning_rate: 8.0e-6
|
| 37 |
+
learning_rate: 1.0e-5
|
| 38 |
+
num_train_epochs: 1.0
|
| 39 |
+
lr_scheduler_type: cosine
|
| 40 |
+
warmup_ratio: 0.1
|
| 41 |
+
bf16: true
|
| 42 |
+
ddp_timeout: 180000000
|
| 43 |
+
resume_from_checkpoint: null
|
| 44 |
+
|
| 45 |
+
### eval
|
| 46 |
+
# eval_dataset: alpaca_en_demo
|
| 47 |
+
# val_size: 0.1
|
| 48 |
+
# per_device_eval_batch_size: 1
|
| 49 |
+
# eval_strategy: steps
|
| 50 |
+
# eval_steps: 500
|
qwen2.5_lora_dpo.yaml
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### model
|
| 2 |
+
model_name_or_path: /data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/sft
|
| 3 |
+
trust_remote_code: true
|
| 4 |
+
|
| 5 |
+
### method
|
| 6 |
+
stage: dpo
|
| 7 |
+
do_train: true
|
| 8 |
+
# finetuning_type: full
|
| 9 |
+
finetuning_type: lora
|
| 10 |
+
lora_rank: 8
|
| 11 |
+
lora_target: all
|
| 12 |
+
# deepspeed: /data/wyt/codes/DocDPO/sft/checkpoints_llama_factory/ds_z3_config.json # choices: [ds_z0_config.json, ds_z2_config.json, ds_z3_config.json]
|
| 13 |
+
|
| 14 |
+
### dataset
|
| 15 |
+
dataset_dir: /data/wyt/codes/DocDPO/sft/data_multilang/red_multilang_base_balanced_en_zhdefr_320
|
| 16 |
+
dataset: dpo_en-zh_tool,dpo_en-zh_trans_base_sample,dpo_en-de_tool,dpo_en-de_trans_base_sample,dpo_en-fr_tool,dpo_en-fr_trans_base_sample
|
| 17 |
+
template: qwen
|
| 18 |
+
cutoff_len: 2560
|
| 19 |
+
# max_samples: 1000
|
| 20 |
+
overwrite_cache: true
|
| 21 |
+
preprocessing_num_workers: 16
|
| 22 |
+
dataloader_num_workers: 4
|
| 23 |
+
|
| 24 |
+
### output
|
| 25 |
+
output_dir: /data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/dpo/adapter
|
| 26 |
+
logging_steps: 5
|
| 27 |
+
save_steps: 200
|
| 28 |
+
plot_loss: true
|
| 29 |
+
overwrite_output_dir: true
|
| 30 |
+
save_only_model: false
|
| 31 |
+
report_to: tensorboard # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
| 32 |
+
|
| 33 |
+
### train
|
| 34 |
+
per_device_train_batch_size: 2
|
| 35 |
+
gradient_accumulation_steps: 4
|
| 36 |
+
# learning_rate: 1.0e-4
|
| 37 |
+
learning_rate: 5.0e-6
|
| 38 |
+
num_train_epochs: 2.0
|
| 39 |
+
lr_scheduler_type: cosine
|
| 40 |
+
warmup_ratio: 0.1
|
| 41 |
+
bf16: true
|
| 42 |
+
ddp_timeout: 180000000
|
| 43 |
+
resume_from_checkpoint: null
|
| 44 |
+
|
| 45 |
+
### eval
|
| 46 |
+
# eval_dataset: alpaca_en_demo
|
| 47 |
+
# val_size: 0.1
|
| 48 |
+
# per_device_eval_batch_size: 1
|
| 49 |
+
# eval_strategy: steps
|
| 50 |
+
# eval_steps: 500
|
run_eval_cohesion.sh
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
dir_path=$1
|
| 5 |
+
|
| 6 |
+
lang=$2
|
| 7 |
+
src_lang=${lang%%-*}
|
| 8 |
+
tgt_lang=${lang##*-}
|
| 9 |
+
|
| 10 |
+
data_path=/data/wyt/codes/DocDPO/data/2017-01-ted-test/$lang
|
| 11 |
+
|
| 12 |
+
for i in {0..11}; do
|
| 13 |
+
# source=$data_path/test.en.$i
|
| 14 |
+
target=$dir_path/IWSLT17.TED.tst2017.${lang}.${src_lang}.$i.${tgt_lang}
|
| 15 |
+
reference=$data_path/IWSLT17.TED.tst2017.${lang}.${tgt_lang}.$i
|
| 16 |
+
result=$dir_path/cohesion.txt
|
| 17 |
+
|
| 18 |
+
# echo $target
|
| 19 |
+
|
| 20 |
+
python -u /data/wyt/codes/DocDPO/evaluator/fine_grained_multi_demensional/eval_cohesion.py \
|
| 21 |
+
--model gpt-4.1 \
|
| 22 |
+
--input_file $target \
|
| 23 |
+
--reference_file $reference \
|
| 24 |
+
--target_language $tgt_lang \
|
| 25 |
+
--output_file $result
|
| 26 |
+
done
|
| 27 |
+
|
| 28 |
+
python /data/wyt/codes/DocDPO/evaluator/fine_grained_multi_demensional/calc_avg_cohesion.py $dir_path/cohesion.txt
|
| 29 |
+
|
| 30 |
+
# cd $dir_path/aligned
|
| 31 |
+
# file_nums=$(ls test.*-s | sort -n -t . -k 2 | xargs wc -l | head -n -1 | awk '{ print $1 }')
|
| 32 |
+
# echo $file_nums
|
| 33 |
+
|
| 34 |
+
# cat $(ls test.*-t | sort -n -t . -k 2) > $dir_path/whole.hyp
|
| 35 |
+
# cat $(ls test.*-s | sort -n -t . -k 2) > $dir_path/whole.src
|
| 36 |
+
|
| 37 |
+
# sh /data/wyt/codes/DocMTAgent/consistency_evaluation/run_eval_sep.sh \
|
| 38 |
+
# $dir_path/whole.src $dir_path/whole.hyp $dir_path/consistency $lang "$file_nums"
|
| 39 |
+
|
| 40 |
+
# cd $work_dir
|
| 41 |
+
# python indiversity_sep.py -r $dir_path/whole.src.record_sep.json | tee -a $dir_path/whole.src.consistency_sep
|
run_eval_comet_api.sh
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
# lang=en-de
|
| 5 |
+
# lang=en-zh
|
| 6 |
+
lang=$2
|
| 7 |
+
src_lang=${lang%%-*}
|
| 8 |
+
tgt_lang=${lang##*-}
|
| 9 |
+
|
| 10 |
+
dir_path=$1
|
| 11 |
+
|
| 12 |
+
data_path=/data/wyt/codes/DocDPO/data/2017-01-ted-test/$lang
|
| 13 |
+
# align_script_path=/data/wyt/codes/DocMTAgent/Bleualign
|
| 14 |
+
# dir_path=/data/wyt/codes/DocDPO/inference_mcts_shorten_rag_entity_tldr_vq_lowerbound_comet/results/window10_epc2/en-zh
|
| 15 |
+
# dir_path=/data/wyt/codes/DocDPO/inference_mcts_shorten_rag_entity_tldr_vq_lowerbound_comet/results/window10_epc1/en-zh
|
| 16 |
+
# dir_path=/data/wyt/codes/DocDPO/inference_mcts_shorten_rag_entity_tldr_vq_lowerbound_comet/results/window10_epc1_trans14b/en-zh
|
| 17 |
+
echo $dir_path
|
| 18 |
+
|
| 19 |
+
for i in {0..11}; do
|
| 20 |
+
source=$data_path/IWSLT17.TED.tst2017.$lang.$src_lang.$i
|
| 21 |
+
target=$dir_path/IWSLT17.TED.tst2017.$lang.$src_lang.$i.$tgt_lang
|
| 22 |
+
reference=$data_path/IWSLT17.TED.tst2017.$lang.$tgt_lang.$i
|
| 23 |
+
# python comet_api.py -s $source -t $target -r $reference -u 10.249.45.139:8088 >> $dir_path/comet_api.txt
|
| 24 |
+
python comet_api.py -s $source -t $target -r $reference -u 10.249.42.177:8088 >> $dir_path/comet_api.txt
|
| 25 |
+
done
|
| 26 |
+
|
| 27 |
+
python calc_avg_comet.py $dir_path/comet_api.txt
|
| 28 |
+
|
| 29 |
+
# cd $dir_path/aligned
|
| 30 |
+
# file_nums=$(ls test.*-s | sort -n -t . -k 2 | xargs wc -l | head -n -1 | awk '{ print $1 }')
|
| 31 |
+
# echo $file_nums
|
| 32 |
+
|
| 33 |
+
# cat $(ls test.*-t | sort -n -t . -k 2) > $dir_path/whole.hyp
|
| 34 |
+
# cat $(ls test.*-s | sort -n -t . -k 2) > $dir_path/whole.src
|
| 35 |
+
|
| 36 |
+
# sh /data/wyt/codes/DocMTAgent/consistency_evaluation/run_eval_sep.sh \
|
| 37 |
+
# $dir_path/whole.src $dir_path/whole.hyp $dir_path/consistency $lang "$file_nums"
|
| 38 |
+
|
| 39 |
+
# cd $work_dir
|
| 40 |
+
# python indiversity_sep.py -r $dir_path/whole.src.record_sep.json | tee -a $dir_path/whole.src.consistency_sep
|
run_eval_robust.sh
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
# lang=en-de
|
| 5 |
+
# lang=en-zh
|
| 6 |
+
lang=$2
|
| 7 |
+
src_lang=${lang%%-*}
|
| 8 |
+
tgt_lang=${lang##*-}
|
| 9 |
+
|
| 10 |
+
dir_path=$1
|
| 11 |
+
level=$3
|
| 12 |
+
|
| 13 |
+
data_path=/data/wyt/codes/DocDPO/data/2017-01-ted-test/$lang
|
| 14 |
+
# align_script_path=/data/wyt/codes/DocMTAgent/Bleualign
|
| 15 |
+
# dir_path=/data/wyt/codes/DocDPO/inference_mcts_shorten_rag_entity_tldr_vq_lowerbound_comet/results/window10_epc2/en-zh
|
| 16 |
+
# dir_path=/data/wyt/codes/DocDPO/inference_mcts_shorten_rag_entity_tldr_vq_lowerbound_comet/results/window10_epc1/en-zh
|
| 17 |
+
# dir_path=/data/wyt/codes/DocDPO/inference_mcts_shorten_rag_entity_tldr_vq_lowerbound_comet/results/window10_epc1_trans14b/en-zh
|
| 18 |
+
echo $dir_path
|
| 19 |
+
|
| 20 |
+
python preprocess_robust.py \
|
| 21 |
+
--original_src_path /data/wyt/codes/DocDPO/data/2017-01-ted-test/${src_lang}-${tgt_lang} \
|
| 22 |
+
--disturb_src_path /data/wyt/codes/DocDPO/data/ted_robust/level_${level}/${src_lang}-${tgt_lang} \
|
| 23 |
+
--tgt_path results_robust/level_${level}/${src_lang}-${tgt_lang}_1200 \
|
| 24 |
+
--output_path results_robust/level_${level}/${src_lang}-${tgt_lang}_1200/tmp_robust \
|
| 25 |
+
--lang_pair $lang
|
| 26 |
+
|
| 27 |
+
for i in {0..11}; do
|
| 28 |
+
source=$data_path/IWSLT17.TED.tst2017.$lang.$src_lang.$i
|
| 29 |
+
target=results_robust/level_${level}/${src_lang}-${tgt_lang}_1200/tmp_robust/IWSLT17.TED.tst2017.$lang.$src_lang.$i.$tgt_lang
|
| 30 |
+
reference=$data_path/IWSLT17.TED.tst2017.$lang.$tgt_lang.$i
|
| 31 |
+
python comet_api.py -s $source -t $target -r $reference -u 127.0.0.1:8088 >> $dir_path/comet_api.txt
|
| 32 |
+
done
|
| 33 |
+
|
| 34 |
+
python calc_avg_comet.py $dir_path/comet_api.txt
|
| 35 |
+
|
| 36 |
+
# cd $dir_path/aligned
|
| 37 |
+
# file_nums=$(ls test.*-s | sort -n -t . -k 2 | xargs wc -l | head -n -1 | awk '{ print $1 }')
|
| 38 |
+
# echo $file_nums
|
| 39 |
+
|
| 40 |
+
# cat $(ls test.*-t | sort -n -t . -k 2) > $dir_path/whole.hyp
|
| 41 |
+
# cat $(ls test.*-s | sort -n -t . -k 2) > $dir_path/whole.src
|
| 42 |
+
|
| 43 |
+
# sh /data/wyt/codes/DocMTAgent/consistency_evaluation/run_eval_sep.sh \
|
| 44 |
+
# $dir_path/whole.src $dir_path/whole.hyp $dir_path/consistency $lang "$file_nums"
|
| 45 |
+
|
| 46 |
+
# cd $work_dir
|
| 47 |
+
# python indiversity_sep.py -r $dir_path/whole.src.record_sep.json | tee -a $dir_path/whole.src.consistency_sep
|
run_merge_fix.sh
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
template_config="merge_template.yaml"
|
| 4 |
+
output_config="merge.yaml"
|
| 5 |
+
adapter_dir="dpo/adapter"
|
| 6 |
+
merged_dir="dpo/merged"
|
| 7 |
+
# model_path=/data/wyt/codes/DocDPO/sft/checkpoints_llama_factory/ted_react_sft_balanced_428/checkpoint-600
|
| 8 |
+
# model_path=/data/wyt/codes/DocDPO/sft/checkpoints_llama_factory/ted_react_sft_trans_base_sample_balanced_428/checkpoint-600
|
| 9 |
+
model_path=$1
|
| 10 |
+
|
| 11 |
+
for dir in $adapter_dir/checkpoint-*; do
|
| 12 |
+
ckpt=$(basename $dir)
|
| 13 |
+
echo $ckpt
|
| 14 |
+
num=${ckpt#checkpoint-}
|
| 15 |
+
echo $num
|
| 16 |
+
if { [ -z "$2" ] && [ -z "$3" ]; } || { [ "$num" -ge $2 ] && [ "$num" -le $3 ]; }; then
|
| 17 |
+
mkdir -p $merged_dir/$ckpt
|
| 18 |
+
cp $template_config $merged_dir/$ckpt/$output_config
|
| 19 |
+
sed -i "s|adapter_name_or_path:.*|adapter_name_or_path: $adapter_dir/$ckpt|" "$merged_dir/$ckpt/$output_config"
|
| 20 |
+
sed -i "s|model_name_or_path:.*|model_name_or_path: $model_path|" "$merged_dir/$ckpt/$output_config"
|
| 21 |
+
sed -i "s|export_dir:.*|export_dir: $merged_dir/$ckpt|" "$merged_dir/$ckpt/$output_config"
|
| 22 |
+
llamafactory-cli export $merged_dir/$ckpt/$output_config
|
| 23 |
+
fi
|
| 24 |
+
done
|
run_train.sh
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
|
| 3 |
+
source ~/.zshrc
|
| 4 |
+
conda activate llama-factory
|
| 5 |
+
|
| 6 |
+
# echo "[$(date)] SFT Training Start"
|
| 7 |
+
# CUDA_VISIBLE_DEVICES=0,1,2,3 FORCE_TORCHRUN=1 llamafactory-cli train qwen2.5_full_sft.yaml > logs/train_sft.log 2>&1 && \
|
| 8 |
+
# echo "[$(date)] SFT Training End"
|
| 9 |
+
|
| 10 |
+
# echo "[$(date)] DPO Training Start"
|
| 11 |
+
# CUDA_VISIBLE_DEVICES=0,1,2,3 FORCE_TORCHRUN=1 llamafactory-cli train qwen2.5_lora_dpo.yaml > logs/train_dpo.log 2>&1 && \
|
| 12 |
+
# echo "[$(date)] DPO Training End"
|
| 13 |
+
|
| 14 |
+
# echo "[$(date)] Merging Checkpoints"
|
| 15 |
+
# CUDA_VISIBLE_DEVICES=0 zsh run_merge_fix.sh /data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/sft 600 800 > logs/merge.log 2>&1 && \
|
| 16 |
+
# echo "[$(date)] Merging Checkpoints End"
|
| 17 |
+
|
| 18 |
+
# conda activate optima-vllm
|
| 19 |
+
|
| 20 |
+
echo "[$(date)] Inference Start"
|
| 21 |
+
CUDA_VISIBLE_DEVICES=0 zsh infer.sh 0 true 600 /data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/dpo/merged/checkpoint-600 en-de > logs/infer_600_en-de.log 2>&1&
|
| 22 |
+
CUDA_VISIBLE_DEVICES=1 zsh infer.sh 1 true 600 /data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/dpo/merged/checkpoint-600 en-fr > logs/infer_600_en-fr.log 2>&1&
|
| 23 |
+
CUDA_VISIBLE_DEVICES=2 zsh infer.sh 2 true 800 /data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/dpo/merged/checkpoint-800 en-fr > logs/infer_800_en-fr.log 2>&1&
|
| 24 |
+
CUDA_VISIBLE_DEVICES=3 zsh infer.sh 3 true 1000 /data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/dpo/merged/checkpoint-1000 en-fr > logs/infer_1000_en-fr.log 2>&1&
|
| 25 |
+
echo "[$(date)] Inference End"
|
test_api.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from openai import OpenAI
|
| 2 |
+
import sys
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def main():
|
| 6 |
+
|
| 7 |
+
openai_api_key = "EMPTY"
|
| 8 |
+
api_base = f"http://{sys.argv[1]}/v1"
|
| 9 |
+
|
| 10 |
+
client = OpenAI(
|
| 11 |
+
api_key=openai_api_key,
|
| 12 |
+
base_url=api_base,
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
try:
|
| 16 |
+
completion = client.chat.completions.create(
|
| 17 |
+
model='qwen',
|
| 18 |
+
messages=[{'role': 'user', 'content': 'Hello!'}],
|
| 19 |
+
timeout=15
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
print(completion.choices[0].message.content)
|
| 23 |
+
except Exception:
|
| 24 |
+
exit(1)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
if __name__ == '__main__':
|
| 28 |
+
main()
|
vllm_1000.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
vllm_1000_2.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
vllm_1200.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
vllm_1400.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
vllm_1600.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
vllm_1800.log
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
INFO 09-18 14:31:14 [__init__.py:241] Automatically detected platform cuda.
|
| 2 |
+
[1;36m(APIServer pid=3508930)[0;0m INFO 09-18 14:31:16 [api_server.py:1805] vLLM API server version 0.10.1.1
|
| 3 |
+
[1;36m(APIServer pid=3508930)[0;0m INFO 09-18 14:31:16 [utils.py:326] non-default args: {'model_tag': '/data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/dpo/merged/checkpoint-1800', 'host': '0.0.0.0', 'port': 8011, 'model': '/data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/dpo/merged/checkpoint-1800', 'served_model_name': ['qwen'], 'enable_prefix_caching': True}
|
| 4 |
+
[1;36m(APIServer pid=3508930)[0;0m INFO 09-18 14:31:22 [__init__.py:711] Resolved architecture: Qwen2ForCausalLM
|
| 5 |
+
[1;36m(APIServer pid=3508930)[0;0m `torch_dtype` is deprecated! Use `dtype` instead!
|
| 6 |
+
[1;36m(APIServer pid=3508930)[0;0m INFO 09-18 14:31:22 [__init__.py:1750] Using max model len 32768
|
| 7 |
+
[1;36m(APIServer pid=3508930)[0;0m INFO 09-18 14:31:23 [scheduler.py:222] Chunked prefill is enabled with max_num_batched_tokens=8192.
|
| 8 |
+
INFO 09-18 14:31:27 [__init__.py:241] Automatically detected platform cuda.
|
| 9 |
+
[1;36m(EngineCore_0 pid=3509752)[0;0m INFO 09-18 14:31:29 [core.py:636] Waiting for init message from front-end.
|
| 10 |
+
[1;36m(EngineCore_0 pid=3509752)[0;0m INFO 09-18 14:31:29 [core.py:74] Initializing a V1 LLM engine (v0.10.1.1) with config: model='/data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/dpo/merged/checkpoint-1800', speculative_config=None, tokenizer='/data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/dpo/merged/checkpoint-1800', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config={}, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=32768, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=cuda, decoding_config=DecodingConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_backend=''), observability_config=ObservabilityConfig(show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None), seed=0, served_model_name=qwen, enable_prefix_caching=True, chunked_prefill_enabled=True, use_async_output_proc=True, pooler_config=None, compilation_config={"level":3,"debug_dump_path":"","cache_dir":"","backend":"","custom_ops":[],"splitting_ops":["vllm.unified_attention","vllm.unified_attention_with_output","vllm.mamba_mixer2"],"use_inductor":true,"compile_sizes":[],"inductor_compile_config":{"enable_auto_functionalized_v2":false},"inductor_passes":{},"cudagraph_mode":1,"use_cudagraph":true,"cudagraph_num_of_warmups":1,"cudagraph_capture_sizes":[512,504,496,488,480,472,464,456,448,440,432,424,416,408,400,392,384,376,368,360,352,344,336,328,320,312,304,296,288,280,272,264,256,248,240,232,224,216,208,200,192,184,176,168,160,152,144,136,128,120,112,104,96,88,80,72,64,56,48,40,32,24,16,8,4,2,1],"cudagraph_copy_inputs":false,"full_cuda_graph":false,"pass_config":{},"max_capture_size":512,"local_cache_dir":null}
|
| 11 |
+
[1;36m(EngineCore_0 pid=3509752)[0;0m INFO 09-18 14:31:30 [parallel_state.py:1134] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0, EP rank 0
|
| 12 |
+
[1;36m(EngineCore_0 pid=3509752)[0;0m WARNING 09-18 14:31:30 [topk_topp_sampler.py:61] FlashInfer is not available. Falling back to the PyTorch-native implementation of top-p & top-k sampling. For the best performance, please install FlashInfer.
|
| 13 |
+
[1;36m(EngineCore_0 pid=3509752)[0;0m INFO 09-18 14:31:30 [gpu_model_runner.py:1953] Starting to load model /data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/dpo/merged/checkpoint-1800...
|
| 14 |
+
[1;36m(EngineCore_0 pid=3509752)[0;0m INFO 09-18 14:31:30 [gpu_model_runner.py:1985] Loading model from scratch...
|
| 15 |
+
[1;36m(EngineCore_0 pid=3509752)[0;0m INFO 09-18 14:31:30 [cuda.py:328] Using Flash Attention backend on V1 engine.
|
| 16 |
+
[1;36m(EngineCore_0 pid=3509752)[0;0m
|
| 17 |
+
[1;36m(EngineCore_0 pid=3509752)[0;0m
|
| 18 |
+
[1;36m(EngineCore_0 pid=3509748)[0;0m
|
| 19 |
+
[1;36m(EngineCore_0 pid=3509744)[0;0m
|
| 20 |
+
[1;36m(EngineCore_0 pid=3509748)[0;0m
|
| 21 |
+
[1;36m(EngineCore_0 pid=3509748)[0;0m
|
| 22 |
+
[1;36m(EngineCore_0 pid=3509748)[0;0m
|
| 23 |
+
[1;36m(EngineCore_0 pid=3509744)[0;0m INFO 09-18 14:31:37 [default_loader.py:262] Loading weights took 6.39 seconds
|
| 24 |
+
[1;36m(EngineCore_0 pid=3509744)[0;0m INFO 09-18 14:31:37 [gpu_model_runner.py:2007] Model loading took 14.2488 GiB and 6.587665 seconds
|
| 25 |
+
[1;36m(EngineCore_0 pid=3509744)[0;0m INFO 09-18 14:31:44 [backends.py:548] Using cache directory: /data/wyt/.cache/vllm/torch_compile_cache/1fe949e292/rank_0_0/backbone for vLLM's torch.compile
|
| 26 |
+
[1;36m(EngineCore_0 pid=3509744)[0;0m INFO 09-18 14:31:44 [backends.py:559] Dynamo bytecode transform time: 6.39 s
|
| 27 |
+
[1;36m(EngineCore_0 pid=3509744)[0;0m INFO 09-18 14:31:49 [backends.py:161] Directly load the compiled graph(s) for dynamic shape from the cache, took 4.697 s
|
| 28 |
+
[1;36m(EngineCore_0 pid=3509744)[0;0m INFO 09-18 14:31:52 [monitor.py:34] torch.compile takes 6.39 s in total
|
| 29 |
+
[1;36m(EngineCore_0 pid=3509744)[0;0m INFO 09-18 14:31:53 [gpu_worker.py:276] Available KV cache memory: 51.38 GiB
|
| 30 |
+
[1;36m(EngineCore_0 pid=3509744)[0;0m INFO 09-18 14:31:53 [kv_cache_utils.py:849] GPU KV cache size: 962,112 tokens
|
| 31 |
+
[1;36m(EngineCore_0 pid=3509744)[0;0m INFO 09-18 14:31:53 [kv_cache_utils.py:853] Maximum concurrency for 32,768 tokens per request: 29.36x
|
| 32 |
+
[1;36m(EngineCore_0 pid=3509744)[0;0m
|
| 33 |
+
[1;36m(EngineCore_0 pid=3509744)[0;0m INFO 09-18 14:31:56 [gpu_model_runner.py:2708] Graph capturing finished in 3 secs, took 1.56 GiB
|
| 34 |
+
[1;36m(EngineCore_0 pid=3509744)[0;0m INFO 09-18 14:31:56 [core.py:214] init engine (profile, create kv cache, warmup model) took 19.28 seconds
|
| 35 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [loggers.py:142] Engine 000: vllm cache_config_info with initialization after num_gpu_blocks is: 60132
|
| 36 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [api_server.py:1611] Supported_tasks: ['generate']
|
| 37 |
+
[1;36m(APIServer pid=3508927)[0;0m WARNING 09-18 14:31:57 [__init__.py:1625] Default sampling parameters have been overridden by the model's Hugging Face generation config recommended from the model creator. If this is not intended, please relaunch vLLM instance with `--generation-config vllm`.
|
| 38 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [serving_responses.py:120] Using default chat sampling params from model: {'repetition_penalty': 1.05, 'temperature': 0.7, 'top_k': 20, 'top_p': 0.8}
|
| 39 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [serving_chat.py:134] Using default chat sampling params from model: {'repetition_penalty': 1.05, 'temperature': 0.7, 'top_k': 20, 'top_p': 0.8}
|
| 40 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [serving_completion.py:77] Using default completion sampling params from model: {'repetition_penalty': 1.05, 'temperature': 0.7, 'top_k': 20, 'top_p': 0.8}
|
| 41 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [api_server.py:1880] Starting vLLM API server 0 on http://0.0.0.0:8012
|
| 42 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [launcher.py:36] Available routes are:
|
| 43 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [launcher.py:44] Route: /openapi.json, Methods: HEAD, GET
|
| 44 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [launcher.py:44] Route: /docs, Methods: HEAD, GET
|
| 45 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [launcher.py:44] Route: /docs/oauth2-redirect, Methods: HEAD, GET
|
| 46 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [launcher.py:44] Route: /redoc, Methods: HEAD, GET
|
| 47 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [launcher.py:44] Route: /health, Methods: GET
|
| 48 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [launcher.py:44] Route: /load, Methods: GET
|
| 49 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [launcher.py:44] Route: /ping, Methods: POST
|
| 50 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [launcher.py:44] Route: /ping, Methods: GET
|
| 51 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [launcher.py:44] Route: /tokenize, Methods: POST
|
| 52 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [launcher.py:44] Route: /detokenize, Methods: POST
|
| 53 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [launcher.py:44] Route: /v1/models, Methods: GET
|
| 54 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [launcher.py:44] Route: /version, Methods: GET
|
| 55 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [launcher.py:44] Route: /v1/responses, Methods: POST
|
| 56 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [launcher.py:44] Route: /v1/responses/{response_id}, Methods: GET
|
| 57 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [launcher.py:44] Route: /v1/responses/{response_id}/cancel, Methods: POST
|
| 58 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [launcher.py:44] Route: /v1/chat/completions, Methods: POST
|
| 59 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [launcher.py:44] Route: /v1/completions, Methods: POST
|
| 60 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [launcher.py:44] Route: /v1/embeddings, Methods: POST
|
| 61 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [launcher.py:44] Route: /pooling, Methods: POST
|
| 62 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [launcher.py:44] Route: /classify, Methods: POST
|
| 63 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [launcher.py:44] Route: /score, Methods: POST
|
| 64 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [launcher.py:44] Route: /v1/score, Methods: POST
|
| 65 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [launcher.py:44] Route: /v1/audio/transcriptions, Methods: POST
|
| 66 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [launcher.py:44] Route: /v1/audio/translations, Methods: POST
|
| 67 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [launcher.py:44] Route: /rerank, Methods: POST
|
| 68 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [launcher.py:44] Route: /v1/rerank, Methods: POST
|
| 69 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [launcher.py:44] Route: /v2/rerank, Methods: POST
|
| 70 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [launcher.py:44] Route: /scale_elastic_ep, Methods: POST
|
| 71 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [launcher.py:44] Route: /is_scaling_elastic_ep, Methods: POST
|
| 72 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO 09-18 14:31:57 [launcher.py:44] Route: /invocations, Met[1;36m(APIServer pid=3508933)[0;0m INFO 09-18 14:32:00 [chat_utils.py:470] Detected the chat template content format to [1;36m(APIServer pid=3508930)[0;0m INFO 09-18 14:32:00 [chat_utils.py:470] Detected the chat template content format to be 'string'. You can set `--chat-template-content-format` to ov[1;36m(APIServer pid=3508933)[0;0m INFO 09-18 14:32:07 [loggers.py:123] Engine 000: Avg prompt throughput: 47.2 tokens/s[1;36m(APIServer pid=3508930)[0;0m INFO 09-18 14:32:07 [loggers.py:123] Engine 000: Avg prompt throughput: 47.2 tokens/s, Avg gene[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 73 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 74 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 75 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 76 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 77 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 78 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 79 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 80 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 81 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 82 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 83 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 84 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 85 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO 09-18 14:32:17 [loggers.py:123] Engine 000: Avg prompt throughput: 608.0 tokens/[1;36m(APIServer pid=3508930)[0;0m INFO 09-18 14:32:17 [loggers.py:123] Engine 000: Avg prompt throughput: 608.0 tokens/s, Avg gener[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 86 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 87 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 88 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 89 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 90 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO 09-18 14:32:27 [loggers.py:123] Engine 000: Avg prompt throughput: 195.2 tokens/[1;36m(APIServer pid=3508930)[0;0m INFO 09-18 14:32:27 [loggers.py:123] Engine 000: Avg prompt throughput: 195.2 tokens/s, Avg gener[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 91 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:39414 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 92 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 93 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:39414 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 94 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO 09-18 14:32:37 [loggers.py:123] Engine 000: Avg prompt throughput: 325.8 tokens/[1;36m(APIServer pid=3508930)[0;0m INFO 09-18 14:32:37 [loggers.py:123] Engine 000: Avg prompt throughput: 325.8 tokens/s, Avg gener[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 95 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 96 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:39414 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 97 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 98 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 99 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 100 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 101 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 102 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 103 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 104 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 105 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO 09-18 14:32:47 [loggers.py:123] Engine 000: Avg prompt throughput: 694.0 tokens/[1;36m(APIServer pid=3508930)[0;0m INFO 09-18 14:32:47 [loggers.py:123] Engine 000: Avg prompt throughput: 694.1 tokens/s, Avg gener[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 106 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 107 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 108 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 109 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 110 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 111 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 112 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 113 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:48072 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 114 |
+
[1;36m(APISe[1;36m(APIServer pid=3508930)[0;0m INFO: 127.0.0.1:41594 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 115 |
+
[1;36m(APIServer pid=3508930)[0;0m INFO 09-18 14:32:57 [loggers.py:123] Engine 000: Avg prompt throughput: 373.7 tokens/s, Avg gener[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 116 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 117 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO 09-18 14:33:07 [loggers.py:123] Engine 000: Avg prompt throughput: 249.6 tokens/[1;36m(APIServer pid=3508930)[0;0m INFO 09-18 14:33:07 [loggers.py:123] Engine 000: Avg prompt throughput: 209.7 tokens/s, Avg gener[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:45372 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 118 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 119 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 120 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:45372 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 121 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO 09-18 14:33:17 [loggers.py:123] Engine 000: Avg prompt throughput: 356.6 tokens/[1;36m(APIServer pid=3508930)[0;0m INFO 09-18 14:33:17 [loggers.py:123] Engine 000: Avg prompt throughput: 356.6 tokens/s, Avg generation throughput: 84.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.1%, Prefix cache hit[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 122 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 123 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 124 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 125 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 126 |
+
[1;36m(APISe[1;36m(APIServer pid=3508930)[0;0m INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 127 |
+
[1;36m(APISe[1;36m(APIServer pid=3508930)[0;0m INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP[1;36m(APISe[1;36m(APIServer pid=3508930)[0;0m INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP[1;36m(APISe[1;36m(APIServer pid=3508930)[0;0m INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 128 |
+
[1;36m(APISe[1;36m(APIServer pid=3508930)[0;0m INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 129 |
+
[1;36m(APISe[1;36m(APIServer pid=3508930)[0;0m INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 130 |
+
[1;36m(APISe[1;36m(APIServer pid=3508930)[0;0m INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 131 |
+
[1;36m(APISe[1;36m(APIServer pid=3508930)[0;0m INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 132 |
+
[1;36m(APISe[1;36m(APIServer pid=3508930)[0;0m INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP[1;36m(APIServer pid=3508933)[0;0m INFO 09-18 14:33:27 [loggers.py:123] Engine 000: Avg prompt throughput: 786.5 tokens/[1;36m(APIServer pid=3508930)[0;0m INFO 09-18 14:33:27 [loggers.py:123] Engine 000: Avg prompt throughput: 786.5 tokens/s, Avg gener[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 133 |
+
[1;36m(APISe[1;36m(APIServer pid=3508930)[0;0m INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 134 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 135 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:54502 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 136 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO 09-18 14:33:37 [loggers.py:123] Engine 000: Avg prompt throughput: 253.4 tokens/[1;36m(APIServer pid=3508930)[0;0m INFO 09-18 14:33:37 [loggers.py:123] Engine 000: Avg prompt throughput: 253.4 tokens/s, Avg gener[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 137 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:54502 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 138 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 139 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 140 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 141 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 142 |
+
[1;36m(APISe[1;36m(APIServer pid=3508930)[0;0m INFO 09-18 14:33:47 [loggers.py:123] Engine 000: Avg prompt throughput: 376.9 tokens/s, Avg generation throughput: 81.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.1%, Prefix cache hit[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:36442 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 143 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 144 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 145 |
+
[1;36m(APISe[1;36m(APIServer pid=3508930)[0;0m INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP[1;36m(APISe[1;36m(APIServer pid=3508930)[0;0m INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 146 |
+
[1;36m(APIServer pid=3508930)[0;0m INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 147 |
+
[1;36m(APIServer pid=3508930)[0;0m INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 148 |
+
[1;36m(APIServer pid=3508930)[0;0m INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 149 |
+
[1;36m(APIServer pid=3508930)[0;0m INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 150 |
+
[1;36m(APISe[1;36m(APIServer pid=3508930)[0;0m INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 151 |
+
[1;36m(APISe[1;36m(APIServer pid=3508930)[0;0m INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP[1;36m(APIServer pid=3508933)[0;0m INFO 09-18 14:33:57 [loggers.py:123] Engine 000: Avg prompt throughput: 848.9 tokens/[1;36m(APIServer pid=3508930)[0;0m INFO 09-18 14:33:57 [loggers.py:123] Engine 000: Avg prompt throughput: 849.0 tokens/s, Avg gener[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 152 |
+
[1;36m(APISe[1;36m(APIServer pid=3508930)[0;0m INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 153 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 154 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:35254 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 155 |
+
[1;36m(APIServer pid=3508933)[0;0m INFO 09-18 14:34:07 [loggers.py:123] Engine 000: Avg prompt throughput: 269.5 tokens/[1;36m(APIServer pid=3508930)[0;0m INFO 09-18 14:34:07 [loggers.py:123] Engine 000: Avg prompt throughput: 269.5 tokens/s, Avg gener[1;36m(APIServer pid=3508933)[0;0m INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 156 |
+
rver pid=3508[1;36m(APIServer pid=3508930)[0;0m INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
| 157 |
+
tion throughput: 84.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.1%, Prefix cache hit rate: 57.6%
|
| 158 |
+
[1;36m(APIServer pid=3508927)[0;0m INFO: 127.0.0.1:58182 - "POST /v1/chat/completions HTTP/1.1" 200 OK
|
vllm_600.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
vllm_600_2.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
vllm_800.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
vllm_800_2.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|