Ashton2000 commited on
Commit
981b783
·
verified ·
1 Parent(s): ebb1804

Upload folder using huggingface_hub

Browse files
calc_avg_comet.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+
3
+
4
+ with open(sys.argv[1], 'r') as f:
5
+ lines = [line.strip() for line in f]
6
+
7
+ scores = [float(line.split()[-1]) for line in lines]
8
+
9
+ with open(sys.argv[1], 'a') as f:
10
+ f.write(f'Average score: {sum(scores) / len(scores) * 100:.2f}\n')
comet_api.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import requests
3
+ import time
4
+ import os
5
+
6
+
7
+ def get_comet_score(instances: list[dict], timeout=100, max_retries=10, comet_api: str=None):
8
+ if comet_api is not None:
9
+ url = f"http://{comet_api}/evaluate"
10
+ else:
11
+ url = f"http://{os.getenv('COMET_API')}/evaluate"
12
+ payload = {'instances': instances}
13
+
14
+ retries = 0
15
+ while retries < max_retries:
16
+ try:
17
+ response = requests.post(url, json=payload, timeout=timeout)
18
+
19
+ if response.status_code == 200:
20
+ # print(response.json()) # {'score': ...}
21
+ return response.json()['scores']
22
+ else:
23
+ print(f"Request failed with status code: {response.status_code}")
24
+ except requests.Timeout:
25
+ retries += 1
26
+ print(f"Request timed out. Retrying... ({retries}/{max_retries})")
27
+ time.sleep(5)
28
+ except requests.RequestException as e:
29
+ raise RuntimeError(f"Request failed due to: {e}")
30
+
31
+ raise RuntimeError("Max retries exceeded. Request failed.")
32
+
33
+
34
+ def main():
35
+ parser = argparse.ArgumentParser()
36
+ parser.add_argument('--source_file', '-s', type=str, required=True)
37
+ parser.add_argument('--target_file', '-t', type=str, required=True)
38
+ parser.add_argument('--reference_file', '-r', type=str, required=True)
39
+ parser.add_argument('--url', '-u', type=str, required=True)
40
+ args = parser.parse_args()
41
+
42
+ source_file = args.source_file
43
+ target_file = args.target_file
44
+ reference_file = args.reference_file
45
+ comet_api = args.url
46
+
47
+ with open(source_file, 'r') as f:
48
+ source_lines = f.readlines()
49
+ with open(target_file, 'r') as f:
50
+ target_lines = f.readlines()
51
+ with open(reference_file, 'r') as f:
52
+ reference_lines = f.readlines()
53
+
54
+ line_comet_scores = get_comet_score([{'src': i, 'mt': j, 'ref': k} for i, j, k in zip(source_lines, target_lines, reference_lines)], comet_api=comet_api)
55
+ avg_score = sum(line_comet_scores) / len(line_comet_scores) if line_comet_scores and len(line_comet_scores) > 0 else -1.0
56
+ print(f'{target_file}\tscore: {avg_score:.4f}')
57
+
58
+
59
+ if __name__ == '__main__':
60
+ main()
ds_z3_config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train_batch_size": "auto",
3
+ "train_micro_batch_size_per_gpu": "auto",
4
+ "gradient_accumulation_steps": "auto",
5
+ "gradient_clipping": "auto",
6
+ "zero_allow_untested_optimizer": true,
7
+ "fp16": {
8
+ "enabled": "auto",
9
+ "loss_scale": 0,
10
+ "loss_scale_window": 1000,
11
+ "initial_scale_power": 16,
12
+ "hysteresis": 2,
13
+ "min_loss_scale": 1
14
+ },
15
+ "bf16": {
16
+ "enabled": "auto"
17
+ },
18
+ "zero_optimization": {
19
+ "stage": 3,
20
+ "overlap_comm": false,
21
+ "contiguous_gradients": true,
22
+ "sub_group_size": 1e9,
23
+ "reduce_bucket_size": "auto",
24
+ "stage3_prefetch_bucket_size": "auto",
25
+ "stage3_param_persistence_threshold": "auto",
26
+ "stage3_max_live_parameters": 1e9,
27
+ "stage3_max_reuse_distance": 1e9,
28
+ "stage3_gather_16bit_weights_on_model_save": true
29
+ }
30
+ }
infer.sh ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # trap 'ssh wyt@${infer_address%%:*} "killall pt_main_thread"; exit' SIGINT
3
+
4
+ device=$1
5
+ deploy_flag=$2
6
+ step=$3
7
+
8
+ if [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]; then
9
+ echo "Usage: $0 <device> <deploy_flag> <step>"
10
+ exit 1
11
+ fi
12
+
13
+ # model_path=/data/wyt/codes/DocDPO/sft/checkpoints_llama_factory/lora/ted_react_trans_base_sample_sft_dpolora_balanced_474/merged_fix/checkpoint-${step}
14
+ model_path=$4
15
+
16
+ language=$5
17
+ src_lang=${language%-*}
18
+ tgt_lang=${language#*-}
19
+
20
+ # infer_address=10.249.42.177:8010
21
+ # schedule_address=10.249.42.177:8011
22
+ # infer_address=127.0.0.1:801$infer_device
23
+ # schedule_address=127.0.0.1:801$schedule_device
24
+ # address=10.249.42.182:801${device}
25
+ address=127.0.0.1:801${device}
26
+
27
+ # setting=window_20_1ep
28
+ # setting=window_20_2ep_new
29
+ work_dir=/data/wyt/codes/DocDPO/inference_monolang/ted_en_zh_balanced_paritial
30
+
31
+ if [ "$deploy_flag" = "true" ]; then
32
+ if [ "${address%%:*}" = "127.0.0.1" ]; then
33
+ source ~/.zshrc
34
+ conda activate vllm
35
+ CUDA_VISIBLE_DEVICES=${device} nohup vllm serve ${model_path} --host 0.0.0.0 --port ${address##*:} --served-model-name "qwen" --enable-prefix-caching --gpu_memory_utilization 0.9 > vllm_${step}.log 2>&1 &
36
+ conda activate optima-vllm
37
+ else
38
+ ssh -n wyt@${address%%:*} "source ~/.zshrc && conda activate optima-vllm && CUDA_VISIBLE_DEVICES=${device} nohup vllm serve ${model_path} --host 0.0.0.0 --port ${address##*:} --served-model-name "qwen" --enable-prefix-caching > /dev/null 2>&1 &"
39
+ fi
40
+ fi
41
+
42
+ echo "Waiting for LLM deployment in 20 seconds..."
43
+ # sleep 20
44
+
45
+ echo "Testing API of ${address}..."
46
+ while true; do
47
+ python test_api.py $address
48
+ if [ $? -eq 0 ]; then
49
+ echo "API connected successfully!"
50
+ break
51
+ else
52
+ echo "API connection failed. Retrying in 5 seconds..."
53
+ sleep 5
54
+ fi
55
+ done
56
+
57
+ cur_path=`pwd`
58
+ cd $work_dir
59
+
60
+ # for i in 4 9 11; do
61
+ for i in {0..11}; do
62
+ if [ ! -f "$cur_path/${src_lang}-${tgt_lang}_${step}/IWSLT17.TED.tst2017.${src_lang}-${tgt_lang}.${src_lang}.$i.${tgt_lang}" ]; then
63
+ echo IWSLT17.TED.tst2017.${src_lang}-${tgt_lang}.${src_lang}.$i.${tgt_lang}
64
+ python -u infer.py \
65
+ --src_file /data/wyt/codes/DocDPO/data/2017-01-ted-test/${src_lang}-${tgt_lang}/IWSLT17.TED.tst2017.${src_lang}-${tgt_lang}.${src_lang}.$i \
66
+ --output_path $cur_path/results/${src_lang}-${tgt_lang}_${step} \
67
+ --window_size 10 \
68
+ --infer_address $address \
69
+ --schedule_address $address \
70
+ --language ${src_lang}-${tgt_lang} \
71
+ --infer_temperature 0.7 \
72
+ --schedule_temperature 0.7 \
73
+ --translate_style base
74
+ fi
75
+ done
76
+
77
+ cd $cur_path
78
+
79
+ # ssh wyt@${infer_address%%:*} "killall pt_main_thread"
infer_2.sh ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # trap 'ssh wyt@${infer_address%%:*} "killall pt_main_thread"; exit' SIGINT
3
+
4
+ device=$1
5
+ deploy_flag=$2
6
+ step=$3
7
+
8
+ if [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]; then
9
+ echo "Usage: $0 <device> <deploy_flag> <step>"
10
+ exit 1
11
+ fi
12
+
13
+ # model_path=/data/wyt/codes/DocDPO/sft/checkpoints_llama_factory/lora/ted_react_trans_base_sample_sft_dpolora_balanced_474/merged_fix/checkpoint-${step}
14
+ model_path=$4
15
+
16
+ language=$5
17
+ src_lang=${language%-*}
18
+ tgt_lang=${language#*-}
19
+
20
+ # infer_address=10.249.42.177:8010
21
+ # schedule_address=10.249.42.177:8011
22
+ # infer_address=127.0.0.1:801$infer_device
23
+ # schedule_address=127.0.0.1:801$schedule_device
24
+ # address=10.249.42.182:801${device}
25
+ address=127.0.0.1:800${device}
26
+
27
+ # setting=window_20_1ep
28
+ # setting=window_20_2ep_new
29
+ work_dir=/data/wyt/codes/DocDPO/inference_monolang/ted_en_zh_balanced_paritial
30
+
31
+ if [ "$deploy_flag" = "true" ]; then
32
+ if [ "${address%%:*}" = "127.0.0.1" ]; then
33
+ source ~/.zshrc
34
+ conda activate vllm
35
+ CUDA_VISIBLE_DEVICES=${device} nohup vllm serve ${model_path} --host 0.0.0.0 --port ${address##*:} --served-model-name "qwen" --enable-prefix-caching --gpu_memory_utilization 0.48 > vllm_${step}_2.log 2>&1 &
36
+ conda activate optima-vllm
37
+ else
38
+ ssh -n wyt@${address%%:*} "source ~/.zshrc && conda activate optima-vllm && CUDA_VISIBLE_DEVICES=${device} nohup vllm serve ${model_path} --host 0.0.0.0 --port ${address##*:} --served-model-name "qwen" --enable-prefix-caching > /dev/null 2>&1 &"
39
+ fi
40
+ fi
41
+
42
+ echo "Waiting for LLM deployment in 20 seconds..."
43
+ # sleep 20
44
+
45
+ echo "Testing API of ${address}..."
46
+ while true; do
47
+ python test_api.py $address
48
+ if [ $? -eq 0 ]; then
49
+ echo "API connected successfully!"
50
+ break
51
+ else
52
+ echo "API connection failed. Retrying in 5 seconds..."
53
+ sleep 5
54
+ fi
55
+ done
56
+
57
+ cur_path=`pwd`
58
+ cd $work_dir
59
+
60
+ # for i in 4 9 11; do
61
+ for i in {0..11}; do
62
+ if [ ! -f "$cur_path/${src_lang}-${tgt_lang}_${step}/IWSLT17.TED.tst2017.${src_lang}-${tgt_lang}.${src_lang}.$i.${tgt_lang}" ]; then
63
+ echo IWSLT17.TED.tst2017.${src_lang}-${tgt_lang}.${src_lang}.$i.${tgt_lang}
64
+ python -u infer.py \
65
+ --src_file /data/wyt/codes/DocDPO/data/2017-01-ted-test/${src_lang}-${tgt_lang}/IWSLT17.TED.tst2017.${src_lang}-${tgt_lang}.${src_lang}.$i \
66
+ --output_path $cur_path/results/${src_lang}-${tgt_lang}_${step} \
67
+ --window_size 10 \
68
+ --infer_address $address \
69
+ --schedule_address $address \
70
+ --language ${src_lang}-${tgt_lang} \
71
+ --infer_temperature 0.7 \
72
+ --schedule_temperature 0.7 \
73
+ --translate_style base
74
+ fi
75
+ done
76
+
77
+ cd $cur_path
78
+
79
+ # ssh wyt@${infer_address%%:*} "killall pt_main_thread"
infer_robust.sh ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # trap 'ssh wyt@${infer_address%%:*} "killall pt_main_thread"; exit' SIGINT
3
+
4
+ device=$1
5
+ deploy_flag=$2
6
+ step=$3
7
+
8
+ if [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]; then
9
+ echo "Usage: $0 <device> <deploy_flag> <step>"
10
+ exit 1
11
+ fi
12
+
13
+ # model_path=/data/wyt/codes/DocDPO/sft/checkpoints_llama_factory/lora/ted_react_trans_base_sample_sft_dpolora_balanced_474/merged_fix/checkpoint-${step}
14
+ model_path=$4
15
+
16
+ language=$5
17
+ src_lang=${language%-*}
18
+ tgt_lang=${language#*-}
19
+
20
+ # infer_address=10.249.42.177:8010
21
+ # schedule_address=10.249.42.177:8011
22
+ # infer_address=127.0.0.1:801$infer_device
23
+ # schedule_address=127.0.0.1:801$schedule_device
24
+ # address=10.249.42.182:801${device}
25
+ # address=127.0.0.1:801${device}
26
+ # address=10.249.45.139:801${device}
27
+ address=${device}
28
+
29
+ level=$6
30
+
31
+ # setting=window_20_1ep
32
+ # setting=window_20_2ep_new
33
+ work_dir=/data/wyt/codes/DocDPO/inference_monolang/ted_en_zh_balanced_paritial
34
+ # data_dir=/data/wyt/codes/DocDPO/data/2017-01-ted-test
35
+ data_dir=/data/wyt/codes/DocDPO/data/ted_robust/level_${level}
36
+ output_dir=/data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/results_robust/level_${level}
37
+
38
+ if [ "$deploy_flag" = "true" ]; then
39
+ if [ "${address%%:*}" = "127.0.0.1" ]; then
40
+ source ~/.zshrc
41
+ conda activate vllm
42
+ CUDA_VISIBLE_DEVICES=${device} nohup vllm serve ${model_path} --host 0.0.0.0 --port ${address##*:} --served-model-name "qwen" --enable-prefix-caching --gpu_memory_utilization 0.9 > vllm_${step}.log 2>&1 &
43
+ conda activate optima-vllm
44
+ else
45
+ ssh -n wyt@${address%%:*} "source ~/.zshrc && conda activate optima-vllm && CUDA_VISIBLE_DEVICES=${device} nohup vllm serve ${model_path} --host 0.0.0.0 --port ${address##*:} --served-model-name "qwen" --enable-prefix-caching > /dev/null 2>&1 &"
46
+ fi
47
+ fi
48
+
49
+ echo "Waiting for LLM deployment in 20 seconds..."
50
+ # sleep 20
51
+
52
+ echo "Testing API of ${address}..."
53
+ while true; do
54
+ python test_api.py $address
55
+ if [ $? -eq 0 ]; then
56
+ echo "API connected successfully!"
57
+ break
58
+ else
59
+ echo "API connection failed. Retrying in 5 seconds..."
60
+ sleep 5
61
+ fi
62
+ done
63
+
64
+ cur_path=`pwd`
65
+ cd $work_dir
66
+
67
+ doc_ids=("${@:7}")
68
+ echo "Document IDs to process: ${doc_ids[@]}"
69
+
70
+ # for i in {0..5}; do
71
+ # for i in {0..11}; do
72
+ for i in "${doc_ids[@]}"; do
73
+ if [ ! -f "$output_dir/${src_lang}-${tgt_lang}_${step}/IWSLT17.TED.tst2017.${src_lang}-${tgt_lang}.${src_lang}.$i.${tgt_lang}" ]; then
74
+ echo IWSLT17.TED.tst2017.${src_lang}-${tgt_lang}.${src_lang}.$i.${tgt_lang}
75
+ python -u infer.py \
76
+ --src_file $data_dir/${src_lang}-${tgt_lang}/IWSLT17.TED.tst2017.${src_lang}-${tgt_lang}.${src_lang}.$i \
77
+ --output_path $output_dir/${src_lang}-${tgt_lang}_${step} \
78
+ --window_size 10 \
79
+ --infer_address $address \
80
+ --schedule_address $address \
81
+ --language ${src_lang}-${tgt_lang} \
82
+ --infer_temperature 0.7 \
83
+ --schedule_temperature 0.7 \
84
+ --translate_style base
85
+ fi
86
+ done
87
+
88
+ cd $cur_path
89
+
90
+ # ssh wyt@${infer_address%%:*} "killall pt_main_thread"
infer_robust_2.sh ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # trap 'ssh wyt@${infer_address%%:*} "killall pt_main_thread"; exit' SIGINT
3
+
4
+ device=$1
5
+ deploy_flag=$2
6
+ step=$3
7
+
8
+ if [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]; then
9
+ echo "Usage: $0 <device> <deploy_flag> <step>"
10
+ exit 1
11
+ fi
12
+
13
+ # model_path=/data/wyt/codes/DocDPO/sft/checkpoints_llama_factory/lora/ted_react_trans_base_sample_sft_dpolora_balanced_474/merged_fix/checkpoint-${step}
14
+ model_path=$4
15
+
16
+ language=$5
17
+ src_lang=${language%-*}
18
+ tgt_lang=${language#*-}
19
+
20
+ # infer_address=10.249.42.177:8010
21
+ # schedule_address=10.249.42.177:8011
22
+ # infer_address=127.0.0.1:801$infer_device
23
+ # schedule_address=127.0.0.1:801$schedule_device
24
+ # address=10.249.42.182:801${device}
25
+ # address=127.0.0.1:801${device}
26
+ address=10.249.45.139:801${device}
27
+
28
+ # setting=window_20_1ep
29
+ # setting=window_20_2ep_new
30
+ work_dir=/data/wyt/codes/DocDPO/inference_monolang/ted_en_zh_balanced_paritial
31
+ # data_dir=/data/wyt/codes/DocDPO/data/2017-01-ted-test
32
+ data_dir=/data/wyt/codes/DocDPO/data/ted_robust/level_3
33
+ output_dir=/data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/results_robust/level_3
34
+
35
+ if [ "$deploy_flag" = "true" ]; then
36
+ if [ "${address%%:*}" = "127.0.0.1" ]; then
37
+ source ~/.zshrc
38
+ conda activate vllm
39
+ CUDA_VISIBLE_DEVICES=${device} nohup vllm serve ${model_path} --host 0.0.0.0 --port ${address##*:} --served-model-name "qwen" --enable-prefix-caching --gpu_memory_utilization 0.9 > vllm_${step}.log 2>&1 &
40
+ conda activate optima-vllm
41
+ else
42
+ ssh -n wyt@${address%%:*} "source ~/.zshrc && conda activate optima-vllm && CUDA_VISIBLE_DEVICES=${device} nohup vllm serve ${model_path} --host 0.0.0.0 --port ${address##*:} --served-model-name "qwen" --enable-prefix-caching > /dev/null 2>&1 &"
43
+ fi
44
+ fi
45
+
46
+ echo "Waiting for LLM deployment in 20 seconds..."
47
+ # sleep 20
48
+
49
+ echo "Testing API of ${address}..."
50
+ while true; do
51
+ python test_api.py $address
52
+ if [ $? -eq 0 ]; then
53
+ echo "API connected successfully!"
54
+ break
55
+ else
56
+ echo "API connection failed. Retrying in 5 seconds..."
57
+ sleep 5
58
+ fi
59
+ done
60
+
61
+ cur_path=`pwd`
62
+ cd $work_dir
63
+
64
+ for i in {6..11}; do
65
+ # for i in {0..11}; do
66
+ if [ ! -f "$output_dir/${src_lang}-${tgt_lang}_${step}/IWSLT17.TED.tst2017.${src_lang}-${tgt_lang}.${src_lang}.$i.${tgt_lang}" ]; then
67
+ echo IWSLT17.TED.tst2017.${src_lang}-${tgt_lang}.${src_lang}.$i.${tgt_lang}
68
+ python -u infer.py \
69
+ --src_file $data_dir/${src_lang}-${tgt_lang}/IWSLT17.TED.tst2017.${src_lang}-${tgt_lang}.${src_lang}.$i \
70
+ --output_path $output_dir/${src_lang}-${tgt_lang}_${step} \
71
+ --window_size 10 \
72
+ --infer_address $address \
73
+ --schedule_address $address \
74
+ --language ${src_lang}-${tgt_lang} \
75
+ --infer_temperature 0.7 \
76
+ --schedule_temperature 0.7 \
77
+ --translate_style base
78
+ fi
79
+ done
80
+
81
+ cd $cur_path
82
+
83
+ # ssh wyt@${infer_address%%:*} "killall pt_main_thread"
merge_template.yaml ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### Note: DO NOT use quantized model or quantization_bit when merging lora adapters
2
+
3
+ ### model
4
+ model_name_or_path:
5
+ adapter_name_or_path:
6
+ template: qwen
7
+ trust_remote_code: true
8
+
9
+ ### export
10
+ export_dir:
11
+ export_size: 5
12
+ export_device: cpu # choices: [cpu, auto]
13
+ export_legacy_format: false
nohup.out ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2025年 09月 17日 星期三 19:57:29 CST] SFT Training Start
2
+ [2025年 09月 18日 星期四 00:05:57 CST] SFT Training End
3
+ [2025年 09月 18日 星期四 00:05:57 CST] DPO Training Start
4
+ [2025年 09月 18日 星期四 09:27:43 CST] DPO Training End
5
+ [2025年 09月 18日 星期四 09:27:43 CST] Merging Checkpoints
6
+ [2025年 09月 18日 星期四 09:33:12 CST] Merging Checkpoints End
7
+ [2025年 09月 18日 星期四 09:33:12 CST] Inference Start
8
+ [2025年 09月 18日 星期四 09:33:12 CST] Inference End
9
+ [2025年 09月 18日 星期四 10:48:45 CST] Inference Start
10
+ [2025年 09月 18日 星期四 10:48:45 CST] Inference End
11
+ [2025年 09月 18日 星期四 12:38:00 CST] Inference Start
12
+ [2025年 09月 18日 星期四 12:38:00 CST] Inference End
13
+ [2025年 09月 18日 星期四 14:29:12 CST] Merging Checkpoints
14
+ [2025年 09月 18日 星期四 14:31:09 CST] Merging Checkpoints End
15
+ [2025年 09月 18日 星期四 14:31:09 CST] Inference End
16
+ [2025年 09月 18日 星期四 14:34:00 CST] Merging Checkpoints
17
+ [2025年 09月 18日 星期四 14:34:25 CST] Merging Checkpoints
18
+ [2025年 09月 18日 星期四 14:36:15 CST] Merging Checkpoints
19
+ [2025年 09月 18日 星期四 14:37:01 CST] Inference Start
20
+ [2025年 09月 18日 星期四 14:37:01 CST] Inference End
21
+ [2025年 09月 18日 星期四 14:37:29 CST] Inference Start
22
+ [2025年 09月 18日 星期四 14:37:29 CST] Inference End
23
+ [2025年 09月 18日 星期四 14:38:31 CST] Inference Start
24
+ [2025年 09月 18日 星期四 14:38:31 CST] Inference End
25
+ [2025年 09月 18日 星期四 14:41:21 CST] Inference Start
26
+ [2025年 09月 18日 星期四 14:41:21 CST] Inference End
27
+ [2025年 09月 18日 星期四 14:43:00 CST] Inference Start
28
+ [2025年 09月 18日 星期四 14:43:00 CST] Inference End
29
+ [2025年 09月 18日 星期四 17:23:43 CST] Inference Start
30
+ [2025年 09月 18日 星期四 17:23:43 CST] Inference End
31
+ [2025年 09月 18日 星期四 17:24:30 CST] Inference Start
32
+ [2025年 09月 18日 星期四 17:24:30 CST] Inference End
preprocess_robust.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+
4
+
5
+ def main():
6
+ src_lang, tgt_lang = args.lang_pair.split("-")
7
+ tgt_file_list = [file for file in os.listdir(args.tgt_path) if file.endswith(f".{tgt_lang}")]
8
+ for tgt_file in tgt_file_list:
9
+ src_file = os.path.splitext(tgt_file)[0]
10
+ doc_id = src_file.split('.')[-1]
11
+ label_file = src_file.replace(f".{src_lang}.", ".id.")
12
+ with open(os.path.join(args.disturb_src_path, label_file), "r", encoding="utf-8") as f:
13
+ labels = [line.strip() for line in f]
14
+ with open(os.path.join(args.tgt_path, tgt_file), "r", encoding="utf-8") as f:
15
+ tgt_lines = [line.strip() for line in f]
16
+
17
+ assert len(labels) == len(tgt_lines), f"Length mismatch in {src_file} and {label_file}"
18
+ filterd_tgt_lines = [tgt for tgt, label in zip(tgt_lines, labels) if label.split('-')[0] == doc_id]
19
+
20
+ with open(os.path.join(args.original_src_path, src_file), "r", encoding="utf-8") as f:
21
+ original_src_lines = [line.strip() for line in f]
22
+ assert len(original_src_lines) == len(filterd_tgt_lines), f"Length mismatch in {src_file} and filtered {tgt_file}"
23
+
24
+ with open(os.path.join(args.output_path, tgt_file), "w", encoding="utf-8") as f:
25
+ f.write("\n".join(filterd_tgt_lines) + "\n")
26
+
27
+
28
+ if __name__ == "__main__":
29
+ parser = argparse.ArgumentParser()
30
+ parser.add_argument("--original_src_path", type=str)
31
+ parser.add_argument("--disturb_src_path", type=str)
32
+ parser.add_argument("--tgt_path", type=str)
33
+ parser.add_argument("--output_path", type=str)
34
+ parser.add_argument("--lang_pair", type=str)
35
+ args = parser.parse_args()
36
+
37
+ os.makedirs(args.output_path, exist_ok=True)
38
+
39
+ main()
qwen2.5_full_sft.yaml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### model
2
+ model_name_or_path: /data/wyt/codes/checkpoints/Qwen2.5-7B-Instruct
3
+ trust_remote_code: true
4
+
5
+ ### method
6
+ stage: sft
7
+ do_train: true
8
+ finetuning_type: full
9
+ deepspeed: /data/wyt/codes/DocDPO/sft/checkpoints_multilang/ds_z3_config.json # choices: [ds_z0_config.json, ds_z2_config.json, ds_z3_config.json]
10
+
11
+ ### dataset
12
+ dataset_dir: /data/wyt/codes/DocDPO/sft/data_multilang/red_multilang_base_balanced_en_zhdefr_320
13
+ dataset: sft_en-zh_tool,sft_en-zh_trans_base_sample,sft_en-de_tool,sft_en-de_trans_base_sample,sft_en-fr_tool,sft_en-fr_trans_base_sample
14
+ template: qwen
15
+ cutoff_len: 2560
16
+ # max_samples: 1000
17
+ overwrite_cache: true
18
+ preprocessing_num_workers: 16
19
+ dataloader_num_workers: 4
20
+ mask_history: true
21
+
22
+ ### output
23
+ output_dir: /data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/sft
24
+ logging_steps: 5
25
+ # save_steps: 300
26
+ save_strategy: epoch
27
+ plot_loss: true
28
+ overwrite_output_dir: true
29
+ save_only_model: true
30
+ report_to: tensorboard # choices: [none, wandb, tensorboard, swanlab, mlflow]
31
+
32
+ ### train
33
+ per_device_train_batch_size: 4
34
+ gradient_accumulation_steps: 2
35
+ # learning_rate: 2.0e-5
36
+ # learning_rate: 8.0e-6
37
+ learning_rate: 1.0e-5
38
+ num_train_epochs: 1.0
39
+ lr_scheduler_type: cosine
40
+ warmup_ratio: 0.1
41
+ bf16: true
42
+ ddp_timeout: 180000000
43
+ resume_from_checkpoint: null
44
+
45
+ ### eval
46
+ # eval_dataset: alpaca_en_demo
47
+ # val_size: 0.1
48
+ # per_device_eval_batch_size: 1
49
+ # eval_strategy: steps
50
+ # eval_steps: 500
qwen2.5_lora_dpo.yaml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### model
2
+ model_name_or_path: /data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/sft
3
+ trust_remote_code: true
4
+
5
+ ### method
6
+ stage: dpo
7
+ do_train: true
8
+ # finetuning_type: full
9
+ finetuning_type: lora
10
+ lora_rank: 8
11
+ lora_target: all
12
+ # deepspeed: /data/wyt/codes/DocDPO/sft/checkpoints_llama_factory/ds_z3_config.json # choices: [ds_z0_config.json, ds_z2_config.json, ds_z3_config.json]
13
+
14
+ ### dataset
15
+ dataset_dir: /data/wyt/codes/DocDPO/sft/data_multilang/red_multilang_base_balanced_en_zhdefr_320
16
+ dataset: dpo_en-zh_tool,dpo_en-zh_trans_base_sample,dpo_en-de_tool,dpo_en-de_trans_base_sample,dpo_en-fr_tool,dpo_en-fr_trans_base_sample
17
+ template: qwen
18
+ cutoff_len: 2560
19
+ # max_samples: 1000
20
+ overwrite_cache: true
21
+ preprocessing_num_workers: 16
22
+ dataloader_num_workers: 4
23
+
24
+ ### output
25
+ output_dir: /data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/dpo/adapter
26
+ logging_steps: 5
27
+ save_steps: 200
28
+ plot_loss: true
29
+ overwrite_output_dir: true
30
+ save_only_model: false
31
+ report_to: tensorboard # choices: [none, wandb, tensorboard, swanlab, mlflow]
32
+
33
+ ### train
34
+ per_device_train_batch_size: 2
35
+ gradient_accumulation_steps: 4
36
+ # learning_rate: 1.0e-4
37
+ learning_rate: 5.0e-6
38
+ num_train_epochs: 2.0
39
+ lr_scheduler_type: cosine
40
+ warmup_ratio: 0.1
41
+ bf16: true
42
+ ddp_timeout: 180000000
43
+ resume_from_checkpoint: null
44
+
45
+ ### eval
46
+ # eval_dataset: alpaca_en_demo
47
+ # val_size: 0.1
48
+ # per_device_eval_batch_size: 1
49
+ # eval_strategy: steps
50
+ # eval_steps: 500
run_eval_cohesion.sh ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+
4
+ dir_path=$1
5
+
6
+ lang=$2
7
+ src_lang=${lang%%-*}
8
+ tgt_lang=${lang##*-}
9
+
10
+ data_path=/data/wyt/codes/DocDPO/data/2017-01-ted-test/$lang
11
+
12
+ for i in {0..11}; do
13
+ # source=$data_path/test.en.$i
14
+ target=$dir_path/IWSLT17.TED.tst2017.${lang}.${src_lang}.$i.${tgt_lang}
15
+ reference=$data_path/IWSLT17.TED.tst2017.${lang}.${tgt_lang}.$i
16
+ result=$dir_path/cohesion.txt
17
+
18
+ # echo $target
19
+
20
+ python -u /data/wyt/codes/DocDPO/evaluator/fine_grained_multi_demensional/eval_cohesion.py \
21
+ --model gpt-4.1 \
22
+ --input_file $target \
23
+ --reference_file $reference \
24
+ --target_language $tgt_lang \
25
+ --output_file $result
26
+ done
27
+
28
+ python /data/wyt/codes/DocDPO/evaluator/fine_grained_multi_demensional/calc_avg_cohesion.py $dir_path/cohesion.txt
29
+
30
+ # cd $dir_path/aligned
31
+ # file_nums=$(ls test.*-s | sort -n -t . -k 2 | xargs wc -l | head -n -1 | awk '{ print $1 }')
32
+ # echo $file_nums
33
+
34
+ # cat $(ls test.*-t | sort -n -t . -k 2) > $dir_path/whole.hyp
35
+ # cat $(ls test.*-s | sort -n -t . -k 2) > $dir_path/whole.src
36
+
37
+ # sh /data/wyt/codes/DocMTAgent/consistency_evaluation/run_eval_sep.sh \
38
+ # $dir_path/whole.src $dir_path/whole.hyp $dir_path/consistency $lang "$file_nums"
39
+
40
+ # cd $work_dir
41
+ # python indiversity_sep.py -r $dir_path/whole.src.record_sep.json | tee -a $dir_path/whole.src.consistency_sep
run_eval_comet_api.sh ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+
4
+ # lang=en-de
5
+ # lang=en-zh
6
+ lang=$2
7
+ src_lang=${lang%%-*}
8
+ tgt_lang=${lang##*-}
9
+
10
+ dir_path=$1
11
+
12
+ data_path=/data/wyt/codes/DocDPO/data/2017-01-ted-test/$lang
13
+ # align_script_path=/data/wyt/codes/DocMTAgent/Bleualign
14
+ # dir_path=/data/wyt/codes/DocDPO/inference_mcts_shorten_rag_entity_tldr_vq_lowerbound_comet/results/window10_epc2/en-zh
15
+ # dir_path=/data/wyt/codes/DocDPO/inference_mcts_shorten_rag_entity_tldr_vq_lowerbound_comet/results/window10_epc1/en-zh
16
+ # dir_path=/data/wyt/codes/DocDPO/inference_mcts_shorten_rag_entity_tldr_vq_lowerbound_comet/results/window10_epc1_trans14b/en-zh
17
+ echo $dir_path
18
+
19
+ for i in {0..11}; do
20
+ source=$data_path/IWSLT17.TED.tst2017.$lang.$src_lang.$i
21
+ target=$dir_path/IWSLT17.TED.tst2017.$lang.$src_lang.$i.$tgt_lang
22
+ reference=$data_path/IWSLT17.TED.tst2017.$lang.$tgt_lang.$i
23
+ # python comet_api.py -s $source -t $target -r $reference -u 10.249.45.139:8088 >> $dir_path/comet_api.txt
24
+ python comet_api.py -s $source -t $target -r $reference -u 10.249.42.177:8088 >> $dir_path/comet_api.txt
25
+ done
26
+
27
+ python calc_avg_comet.py $dir_path/comet_api.txt
28
+
29
+ # cd $dir_path/aligned
30
+ # file_nums=$(ls test.*-s | sort -n -t . -k 2 | xargs wc -l | head -n -1 | awk '{ print $1 }')
31
+ # echo $file_nums
32
+
33
+ # cat $(ls test.*-t | sort -n -t . -k 2) > $dir_path/whole.hyp
34
+ # cat $(ls test.*-s | sort -n -t . -k 2) > $dir_path/whole.src
35
+
36
+ # sh /data/wyt/codes/DocMTAgent/consistency_evaluation/run_eval_sep.sh \
37
+ # $dir_path/whole.src $dir_path/whole.hyp $dir_path/consistency $lang "$file_nums"
38
+
39
+ # cd $work_dir
40
+ # python indiversity_sep.py -r $dir_path/whole.src.record_sep.json | tee -a $dir_path/whole.src.consistency_sep
run_eval_robust.sh ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+
4
+ # lang=en-de
5
+ # lang=en-zh
6
+ lang=$2
7
+ src_lang=${lang%%-*}
8
+ tgt_lang=${lang##*-}
9
+
10
+ dir_path=$1
11
+ level=$3
12
+
13
+ data_path=/data/wyt/codes/DocDPO/data/2017-01-ted-test/$lang
14
+ # align_script_path=/data/wyt/codes/DocMTAgent/Bleualign
15
+ # dir_path=/data/wyt/codes/DocDPO/inference_mcts_shorten_rag_entity_tldr_vq_lowerbound_comet/results/window10_epc2/en-zh
16
+ # dir_path=/data/wyt/codes/DocDPO/inference_mcts_shorten_rag_entity_tldr_vq_lowerbound_comet/results/window10_epc1/en-zh
17
+ # dir_path=/data/wyt/codes/DocDPO/inference_mcts_shorten_rag_entity_tldr_vq_lowerbound_comet/results/window10_epc1_trans14b/en-zh
18
+ echo $dir_path
19
+
20
+ python preprocess_robust.py \
21
+ --original_src_path /data/wyt/codes/DocDPO/data/2017-01-ted-test/${src_lang}-${tgt_lang} \
22
+ --disturb_src_path /data/wyt/codes/DocDPO/data/ted_robust/level_${level}/${src_lang}-${tgt_lang} \
23
+ --tgt_path results_robust/level_${level}/${src_lang}-${tgt_lang}_1200 \
24
+ --output_path results_robust/level_${level}/${src_lang}-${tgt_lang}_1200/tmp_robust \
25
+ --lang_pair $lang
26
+
27
+ for i in {0..11}; do
28
+ source=$data_path/IWSLT17.TED.tst2017.$lang.$src_lang.$i
29
+ target=results_robust/level_${level}/${src_lang}-${tgt_lang}_1200/tmp_robust/IWSLT17.TED.tst2017.$lang.$src_lang.$i.$tgt_lang
30
+ reference=$data_path/IWSLT17.TED.tst2017.$lang.$tgt_lang.$i
31
+ python comet_api.py -s $source -t $target -r $reference -u 127.0.0.1:8088 >> $dir_path/comet_api.txt
32
+ done
33
+
34
+ python calc_avg_comet.py $dir_path/comet_api.txt
35
+
36
+ # cd $dir_path/aligned
37
+ # file_nums=$(ls test.*-s | sort -n -t . -k 2 | xargs wc -l | head -n -1 | awk '{ print $1 }')
38
+ # echo $file_nums
39
+
40
+ # cat $(ls test.*-t | sort -n -t . -k 2) > $dir_path/whole.hyp
41
+ # cat $(ls test.*-s | sort -n -t . -k 2) > $dir_path/whole.src
42
+
43
+ # sh /data/wyt/codes/DocMTAgent/consistency_evaluation/run_eval_sep.sh \
44
+ # $dir_path/whole.src $dir_path/whole.hyp $dir_path/consistency $lang "$file_nums"
45
+
46
+ # cd $work_dir
47
+ # python indiversity_sep.py -r $dir_path/whole.src.record_sep.json | tee -a $dir_path/whole.src.consistency_sep
run_merge_fix.sh ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ template_config="merge_template.yaml"
4
+ output_config="merge.yaml"
5
+ adapter_dir="dpo/adapter"
6
+ merged_dir="dpo/merged"
7
+ # model_path=/data/wyt/codes/DocDPO/sft/checkpoints_llama_factory/ted_react_sft_balanced_428/checkpoint-600
8
+ # model_path=/data/wyt/codes/DocDPO/sft/checkpoints_llama_factory/ted_react_sft_trans_base_sample_balanced_428/checkpoint-600
9
+ model_path=$1
10
+
11
+ for dir in $adapter_dir/checkpoint-*; do
12
+ ckpt=$(basename $dir)
13
+ echo $ckpt
14
+ num=${ckpt#checkpoint-}
15
+ echo $num
16
+ if { [ -z "$2" ] && [ -z "$3" ]; } || { [ "$num" -ge $2 ] && [ "$num" -le $3 ]; }; then
17
+ mkdir -p $merged_dir/$ckpt
18
+ cp $template_config $merged_dir/$ckpt/$output_config
19
+ sed -i "s|adapter_name_or_path:.*|adapter_name_or_path: $adapter_dir/$ckpt|" "$merged_dir/$ckpt/$output_config"
20
+ sed -i "s|model_name_or_path:.*|model_name_or_path: $model_path|" "$merged_dir/$ckpt/$output_config"
21
+ sed -i "s|export_dir:.*|export_dir: $merged_dir/$ckpt|" "$merged_dir/$ckpt/$output_config"
22
+ llamafactory-cli export $merged_dir/$ckpt/$output_config
23
+ fi
24
+ done
run_train.sh ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ source ~/.zshrc
4
+ conda activate llama-factory
5
+
6
+ # echo "[$(date)] SFT Training Start"
7
+ # CUDA_VISIBLE_DEVICES=0,1,2,3 FORCE_TORCHRUN=1 llamafactory-cli train qwen2.5_full_sft.yaml > logs/train_sft.log 2>&1 && \
8
+ # echo "[$(date)] SFT Training End"
9
+
10
+ # echo "[$(date)] DPO Training Start"
11
+ # CUDA_VISIBLE_DEVICES=0,1,2,3 FORCE_TORCHRUN=1 llamafactory-cli train qwen2.5_lora_dpo.yaml > logs/train_dpo.log 2>&1 && \
12
+ # echo "[$(date)] DPO Training End"
13
+
14
+ # echo "[$(date)] Merging Checkpoints"
15
+ # CUDA_VISIBLE_DEVICES=0 zsh run_merge_fix.sh /data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/sft 600 800 > logs/merge.log 2>&1 && \
16
+ # echo "[$(date)] Merging Checkpoints End"
17
+
18
+ # conda activate optima-vllm
19
+
20
+ echo "[$(date)] Inference Start"
21
+ CUDA_VISIBLE_DEVICES=0 zsh infer.sh 0 true 600 /data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/dpo/merged/checkpoint-600 en-de > logs/infer_600_en-de.log 2>&1&
22
+ CUDA_VISIBLE_DEVICES=1 zsh infer.sh 1 true 600 /data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/dpo/merged/checkpoint-600 en-fr > logs/infer_600_en-fr.log 2>&1&
23
+ CUDA_VISIBLE_DEVICES=2 zsh infer.sh 2 true 800 /data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/dpo/merged/checkpoint-800 en-fr > logs/infer_800_en-fr.log 2>&1&
24
+ CUDA_VISIBLE_DEVICES=3 zsh infer.sh 3 true 1000 /data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/dpo/merged/checkpoint-1000 en-fr > logs/infer_1000_en-fr.log 2>&1&
25
+ echo "[$(date)] Inference End"
test_api.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+ import sys
3
+
4
+
5
+ def main():
6
+
7
+ openai_api_key = "EMPTY"
8
+ api_base = f"http://{sys.argv[1]}/v1"
9
+
10
+ client = OpenAI(
11
+ api_key=openai_api_key,
12
+ base_url=api_base,
13
+ )
14
+
15
+ try:
16
+ completion = client.chat.completions.create(
17
+ model='qwen',
18
+ messages=[{'role': 'user', 'content': 'Hello!'}],
19
+ timeout=15
20
+ )
21
+
22
+ print(completion.choices[0].message.content)
23
+ except Exception:
24
+ exit(1)
25
+
26
+
27
+ if __name__ == '__main__':
28
+ main()
vllm_1000.log ADDED
The diff for this file is too large to render. See raw diff
 
vllm_1000_2.log ADDED
The diff for this file is too large to render. See raw diff
 
vllm_1200.log ADDED
The diff for this file is too large to render. See raw diff
 
vllm_1400.log ADDED
The diff for this file is too large to render. See raw diff
 
vllm_1600.log ADDED
The diff for this file is too large to render. See raw diff
 
vllm_1800.log ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ INFO 09-18 14:31:14 [__init__.py:241] Automatically detected platform cuda.
2
+ (APIServer pid=3508930) INFO 09-18 14:31:16 [api_server.py:1805] vLLM API server version 0.10.1.1
3
+ (APIServer pid=3508930) INFO 09-18 14:31:16 [utils.py:326] non-default args: {'model_tag': '/data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/dpo/merged/checkpoint-1800', 'host': '0.0.0.0', 'port': 8011, 'model': '/data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/dpo/merged/checkpoint-1800', 'served_model_name': ['qwen'], 'enable_prefix_caching': True}
4
+ (APIServer pid=3508930) INFO 09-18 14:31:22 [__init__.py:711] Resolved architecture: Qwen2ForCausalLM
5
+ (APIServer pid=3508930) `torch_dtype` is deprecated! Use `dtype` instead!
6
+ (APIServer pid=3508930) INFO 09-18 14:31:22 [__init__.py:1750] Using max model len 32768
7
+ (APIServer pid=3508930) INFO 09-18 14:31:23 [scheduler.py:222] Chunked prefill is enabled with max_num_batched_tokens=8192.
8
+ INFO 09-18 14:31:27 [__init__.py:241] Automatically detected platform cuda.
9
+ (EngineCore_0 pid=3509752) INFO 09-18 14:31:29 [core.py:636] Waiting for init message from front-end.
10
+ (EngineCore_0 pid=3509752) INFO 09-18 14:31:29 [core.py:74] Initializing a V1 LLM engine (v0.10.1.1) with config: model='/data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/dpo/merged/checkpoint-1800', speculative_config=None, tokenizer='/data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/dpo/merged/checkpoint-1800', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config={}, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=32768, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=cuda, decoding_config=DecodingConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_backend=''), observability_config=ObservabilityConfig(show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None), seed=0, served_model_name=qwen, enable_prefix_caching=True, chunked_prefill_enabled=True, use_async_output_proc=True, pooler_config=None, compilation_config={"level":3,"debug_dump_path":"","cache_dir":"","backend":"","custom_ops":[],"splitting_ops":["vllm.unified_attention","vllm.unified_attention_with_output","vllm.mamba_mixer2"],"use_inductor":true,"compile_sizes":[],"inductor_compile_config":{"enable_auto_functionalized_v2":false},"inductor_passes":{},"cudagraph_mode":1,"use_cudagraph":true,"cudagraph_num_of_warmups":1,"cudagraph_capture_sizes":[512,504,496,488,480,472,464,456,448,440,432,424,416,408,400,392,384,376,368,360,352,344,336,328,320,312,304,296,288,280,272,264,256,248,240,232,224,216,208,200,192,184,176,168,160,152,144,136,128,120,112,104,96,88,80,72,64,56,48,40,32,24,16,8,4,2,1],"cudagraph_copy_inputs":false,"full_cuda_graph":false,"pass_config":{},"max_capture_size":512,"local_cache_dir":null}
11
+ (EngineCore_0 pid=3509752) INFO 09-18 14:31:30 [parallel_state.py:1134] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0, EP rank 0
12
+ (EngineCore_0 pid=3509752) WARNING 09-18 14:31:30 [topk_topp_sampler.py:61] FlashInfer is not available. Falling back to the PyTorch-native implementation of top-p & top-k sampling. For the best performance, please install FlashInfer.
13
+ (EngineCore_0 pid=3509752) INFO 09-18 14:31:30 [gpu_model_runner.py:1953] Starting to load model /data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/dpo/merged/checkpoint-1800...
14
+ (EngineCore_0 pid=3509752) INFO 09-18 14:31:30 [gpu_model_runner.py:1985] Loading model from scratch...
15
+ (EngineCore_0 pid=3509752) INFO 09-18 14:31:30 [cuda.py:328] Using Flash Attention backend on V1 engine.
16
+ (EngineCore_0 pid=3509752)
17
+ (EngineCore_0 pid=3509752)
18
+ (EngineCore_0 pid=3509748)
19
+ (EngineCore_0 pid=3509744)
20
+ (EngineCore_0 pid=3509748)
21
+ (EngineCore_0 pid=3509748)
22
+ (EngineCore_0 pid=3509748)
23
+ (EngineCore_0 pid=3509744) INFO 09-18 14:31:37 [default_loader.py:262] Loading weights took 6.39 seconds
24
+ (EngineCore_0 pid=3509744) INFO 09-18 14:31:37 [gpu_model_runner.py:2007] Model loading took 14.2488 GiB and 6.587665 seconds
25
+ (EngineCore_0 pid=3509744) INFO 09-18 14:31:44 [backends.py:548] Using cache directory: /data/wyt/.cache/vllm/torch_compile_cache/1fe949e292/rank_0_0/backbone for vLLM's torch.compile
26
+ (EngineCore_0 pid=3509744) INFO 09-18 14:31:44 [backends.py:559] Dynamo bytecode transform time: 6.39 s
27
+ (EngineCore_0 pid=3509744) INFO 09-18 14:31:49 [backends.py:161] Directly load the compiled graph(s) for dynamic shape from the cache, took 4.697 s
28
+ (EngineCore_0 pid=3509744) INFO 09-18 14:31:52 [monitor.py:34] torch.compile takes 6.39 s in total
29
+ (EngineCore_0 pid=3509744) INFO 09-18 14:31:53 [gpu_worker.py:276] Available KV cache memory: 51.38 GiB
30
+ (EngineCore_0 pid=3509744) INFO 09-18 14:31:53 [kv_cache_utils.py:849] GPU KV cache size: 962,112 tokens
31
+ (EngineCore_0 pid=3509744) INFO 09-18 14:31:53 [kv_cache_utils.py:853] Maximum concurrency for 32,768 tokens per request: 29.36x
32
+ (EngineCore_0 pid=3509744)
33
+ (EngineCore_0 pid=3509744) INFO 09-18 14:31:56 [gpu_model_runner.py:2708] Graph capturing finished in 3 secs, took 1.56 GiB
34
+ (EngineCore_0 pid=3509744) INFO 09-18 14:31:56 [core.py:214] init engine (profile, create kv cache, warmup model) took 19.28 seconds
35
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [loggers.py:142] Engine 000: vllm cache_config_info with initialization after num_gpu_blocks is: 60132
36
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [api_server.py:1611] Supported_tasks: ['generate']
37
+ (APIServer pid=3508927) WARNING 09-18 14:31:57 [__init__.py:1625] Default sampling parameters have been overridden by the model's Hugging Face generation config recommended from the model creator. If this is not intended, please relaunch vLLM instance with `--generation-config vllm`.
38
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [serving_responses.py:120] Using default chat sampling params from model: {'repetition_penalty': 1.05, 'temperature': 0.7, 'top_k': 20, 'top_p': 0.8}
39
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [serving_chat.py:134] Using default chat sampling params from model: {'repetition_penalty': 1.05, 'temperature': 0.7, 'top_k': 20, 'top_p': 0.8}
40
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [serving_completion.py:77] Using default completion sampling params from model: {'repetition_penalty': 1.05, 'temperature': 0.7, 'top_k': 20, 'top_p': 0.8}
41
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [api_server.py:1880] Starting vLLM API server 0 on http://0.0.0.0:8012
42
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [launcher.py:36] Available routes are:
43
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [launcher.py:44] Route: /openapi.json, Methods: HEAD, GET
44
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [launcher.py:44] Route: /docs, Methods: HEAD, GET
45
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [launcher.py:44] Route: /docs/oauth2-redirect, Methods: HEAD, GET
46
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [launcher.py:44] Route: /redoc, Methods: HEAD, GET
47
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [launcher.py:44] Route: /health, Methods: GET
48
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [launcher.py:44] Route: /load, Methods: GET
49
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [launcher.py:44] Route: /ping, Methods: POST
50
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [launcher.py:44] Route: /ping, Methods: GET
51
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [launcher.py:44] Route: /tokenize, Methods: POST
52
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [launcher.py:44] Route: /detokenize, Methods: POST
53
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [launcher.py:44] Route: /v1/models, Methods: GET
54
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [launcher.py:44] Route: /version, Methods: GET
55
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [launcher.py:44] Route: /v1/responses, Methods: POST
56
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [launcher.py:44] Route: /v1/responses/{response_id}, Methods: GET
57
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [launcher.py:44] Route: /v1/responses/{response_id}/cancel, Methods: POST
58
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [launcher.py:44] Route: /v1/chat/completions, Methods: POST
59
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [launcher.py:44] Route: /v1/completions, Methods: POST
60
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [launcher.py:44] Route: /v1/embeddings, Methods: POST
61
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [launcher.py:44] Route: /pooling, Methods: POST
62
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [launcher.py:44] Route: /classify, Methods: POST
63
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [launcher.py:44] Route: /score, Methods: POST
64
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [launcher.py:44] Route: /v1/score, Methods: POST
65
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [launcher.py:44] Route: /v1/audio/transcriptions, Methods: POST
66
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [launcher.py:44] Route: /v1/audio/translations, Methods: POST
67
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [launcher.py:44] Route: /rerank, Methods: POST
68
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [launcher.py:44] Route: /v1/rerank, Methods: POST
69
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [launcher.py:44] Route: /v2/rerank, Methods: POST
70
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [launcher.py:44] Route: /scale_elastic_ep, Methods: POST
71
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [launcher.py:44] Route: /is_scaling_elastic_ep, Methods: POST
72
+ (APIServer pid=3508927) INFO 09-18 14:31:57 [launcher.py:44] Route: /invocations, Met(APIServer pid=3508933) INFO 09-18 14:32:00 [chat_utils.py:470] Detected the chat template content format to (APIServer pid=3508930) INFO 09-18 14:32:00 [chat_utils.py:470] Detected the chat template content format to be 'string'. You can set `--chat-template-content-format` to ov(APIServer pid=3508933) INFO 09-18 14:32:07 [loggers.py:123] Engine 000: Avg prompt throughput: 47.2 tokens/s(APIServer pid=3508930) INFO 09-18 14:32:07 [loggers.py:123] Engine 000: Avg prompt throughput: 47.2 tokens/s, Avg gene(APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
73
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
74
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
75
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
76
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
77
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
78
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
79
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
80
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
81
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
82
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
83
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
84
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
85
+ (APIServer pid=3508933) INFO 09-18 14:32:17 [loggers.py:123] Engine 000: Avg prompt throughput: 608.0 tokens/(APIServer pid=3508930) INFO 09-18 14:32:17 [loggers.py:123] Engine 000: Avg prompt throughput: 608.0 tokens/s, Avg gener(APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
86
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
87
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
88
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
89
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
90
+ (APIServer pid=3508933) INFO 09-18 14:32:27 [loggers.py:123] Engine 000: Avg prompt throughput: 195.2 tokens/(APIServer pid=3508930) INFO 09-18 14:32:27 [loggers.py:123] Engine 000: Avg prompt throughput: 195.2 tokens/s, Avg gener(APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
91
+ (APIServer pid=3508933) INFO: 127.0.0.1:39414 - "POST /v1/chat/completions HTTP/1.1" 200 OK
92
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
93
+ (APIServer pid=3508933) INFO: 127.0.0.1:39414 - "POST /v1/chat/completions HTTP/1.1" 200 OK
94
+ (APIServer pid=3508933) INFO 09-18 14:32:37 [loggers.py:123] Engine 000: Avg prompt throughput: 325.8 tokens/(APIServer pid=3508930) INFO 09-18 14:32:37 [loggers.py:123] Engine 000: Avg prompt throughput: 325.8 tokens/s, Avg gener(APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
95
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
96
+ (APIServer pid=3508933) INFO: 127.0.0.1:39414 - "POST /v1/chat/completions HTTP/1.1" 200 OK
97
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
98
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
99
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
100
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
101
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
102
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
103
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
104
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
105
+ (APIServer pid=3508933) INFO 09-18 14:32:47 [loggers.py:123] Engine 000: Avg prompt throughput: 694.0 tokens/(APIServer pid=3508930) INFO 09-18 14:32:47 [loggers.py:123] Engine 000: Avg prompt throughput: 694.1 tokens/s, Avg gener(APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
106
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
107
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
108
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
109
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
110
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
111
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
112
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
113
+ (APIServer pid=3508933) INFO: 127.0.0.1:48072 - "POST /v1/chat/completions HTTP/1.1" 200 OK
114
+ (APISe(APIServer pid=3508930) INFO: 127.0.0.1:41594 - "POST /v1/chat/completions HTTP/1.1" 200 OK
115
+ (APIServer pid=3508930) INFO 09-18 14:32:57 [loggers.py:123] Engine 000: Avg prompt throughput: 373.7 tokens/s, Avg gener(APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
116
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
117
+ (APIServer pid=3508933) INFO 09-18 14:33:07 [loggers.py:123] Engine 000: Avg prompt throughput: 249.6 tokens/(APIServer pid=3508930) INFO 09-18 14:33:07 [loggers.py:123] Engine 000: Avg prompt throughput: 209.7 tokens/s, Avg gener(APIServer pid=3508933) INFO: 127.0.0.1:45372 - "POST /v1/chat/completions HTTP/1.1" 200 OK
118
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
119
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
120
+ (APIServer pid=3508933) INFO: 127.0.0.1:45372 - "POST /v1/chat/completions HTTP/1.1" 200 OK
121
+ (APIServer pid=3508933) INFO 09-18 14:33:17 [loggers.py:123] Engine 000: Avg prompt throughput: 356.6 tokens/(APIServer pid=3508930) INFO 09-18 14:33:17 [loggers.py:123] Engine 000: Avg prompt throughput: 356.6 tokens/s, Avg generation throughput: 84.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.1%, Prefix cache hit(APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
122
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
123
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
124
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
125
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
126
+ (APISe(APIServer pid=3508930) INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP(APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
127
+ (APISe(APIServer pid=3508930) INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP(APISe(APIServer pid=3508930) INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP(APISe(APIServer pid=3508930) INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP(APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
128
+ (APISe(APIServer pid=3508930) INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP(APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
129
+ (APISe(APIServer pid=3508930) INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP(APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
130
+ (APISe(APIServer pid=3508930) INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP(APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
131
+ (APISe(APIServer pid=3508930) INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP(APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
132
+ (APISe(APIServer pid=3508930) INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP(APIServer pid=3508933) INFO 09-18 14:33:27 [loggers.py:123] Engine 000: Avg prompt throughput: 786.5 tokens/(APIServer pid=3508930) INFO 09-18 14:33:27 [loggers.py:123] Engine 000: Avg prompt throughput: 786.5 tokens/s, Avg gener(APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
133
+ (APISe(APIServer pid=3508930) INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP(APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
134
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
135
+ (APIServer pid=3508933) INFO: 127.0.0.1:54502 - "POST /v1/chat/completions HTTP/1.1" 200 OK
136
+ (APIServer pid=3508933) INFO 09-18 14:33:37 [loggers.py:123] Engine 000: Avg prompt throughput: 253.4 tokens/(APIServer pid=3508930) INFO 09-18 14:33:37 [loggers.py:123] Engine 000: Avg prompt throughput: 253.4 tokens/s, Avg gener(APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
137
+ (APIServer pid=3508933) INFO: 127.0.0.1:54502 - "POST /v1/chat/completions HTTP/1.1" 200 OK
138
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
139
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
140
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
141
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
142
+ (APISe(APIServer pid=3508930) INFO 09-18 14:33:47 [loggers.py:123] Engine 000: Avg prompt throughput: 376.9 tokens/s, Avg generation throughput: 81.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.1%, Prefix cache hit(APIServer pid=3508933) INFO: 127.0.0.1:36442 - "POST /v1/chat/completions HTTP/1.1" 200 OK
143
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
144
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
145
+ (APISe(APIServer pid=3508930) INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP(APISe(APIServer pid=3508930) INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP/1.1" 200 OK
146
+ (APIServer pid=3508930) INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP/1.1" 200 OK
147
+ (APIServer pid=3508930) INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP/1.1" 200 OK
148
+ (APIServer pid=3508930) INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP/1.1" 200 OK
149
+ (APIServer pid=3508930) INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP(APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
150
+ (APISe(APIServer pid=3508930) INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP(APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
151
+ (APISe(APIServer pid=3508930) INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP(APIServer pid=3508933) INFO 09-18 14:33:57 [loggers.py:123] Engine 000: Avg prompt throughput: 848.9 tokens/(APIServer pid=3508930) INFO 09-18 14:33:57 [loggers.py:123] Engine 000: Avg prompt throughput: 849.0 tokens/s, Avg gener(APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
152
+ (APISe(APIServer pid=3508930) INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP(APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
153
+ (APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
154
+ (APIServer pid=3508933) INFO: 127.0.0.1:35254 - "POST /v1/chat/completions HTTP/1.1" 200 OK
155
+ (APIServer pid=3508933) INFO 09-18 14:34:07 [loggers.py:123] Engine 000: Avg prompt throughput: 269.5 tokens/(APIServer pid=3508930) INFO 09-18 14:34:07 [loggers.py:123] Engine 000: Avg prompt throughput: 269.5 tokens/s, Avg gener(APIServer pid=3508933) INFO: 127.0.0.1:56172 - "POST /v1/chat/completions HTTP/1.1" 200 OK
156
+ rver pid=3508(APIServer pid=3508930) INFO: 127.0.0.1:32922 - "POST /v1/chat/completions HTTP/1.1" 200 OK
157
+ tion throughput: 84.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.1%, Prefix cache hit rate: 57.6%
158
+ (APIServer pid=3508927) INFO: 127.0.0.1:58182 - "POST /v1/chat/completions HTTP/1.1" 200 OK
vllm_600.log ADDED
The diff for this file is too large to render. See raw diff
 
vllm_600_2.log ADDED
The diff for this file is too large to render. See raw diff
 
vllm_800.log ADDED
The diff for this file is too large to render. See raw diff
 
vllm_800_2.log ADDED
The diff for this file is too large to render. See raw diff