| |
| CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES=0 python3 -m vllm.entrypoints.openai.api_server \ |
| --model /home/mshahidul/readctrl_model/support_checking_bn/gemma-3-4b-it \ |
| --gpu-memory-utilization 0.47 \ |
| --served-model-name support-check \ |
| --port 8090 \ |
| --max-model-len 8192 \ |
| --trust-remote-code \ |
| --tensor-parallel-size 1 \ |
| --enable-prefix-caching \ |
| --dtype bfloat16 \ |
| --max-num-seqs 256 |
| |
|
|
|
|
|
|
| |
| CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES=0 python3 -m vllm.entrypoints.openai.api_server \ |
| --model /home/mshahidul/readctrl_model/text_classifier_bn/gemma-3-4b-it \ |
| --served-model-name classifier \ |
| --gpu-memory-utilization 0.47 \ |
| --port 8040 \ |
| --max-model-len 8192 \ |
| --trust-remote-code \ |
| --tensor-parallel-size 1 \ |
| --enable-prefix-caching \ |
| --dtype bfloat16 \ |
| --max-num-seqs 256 |
|
|
| |
| CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES=3 python3 -m vllm.entrypoints.openai.api_server \ |
| --model Qwen/Qwen3-30B-A3B-Instruct-2507 \ |
| --served-model-name subclaim-extractor \ |
| --gpu-memory-utilization 0.9 \ |
| --port 8051 \ |
| --max-model-len 16384 \ |
| --trust-remote-code \ |
| --tensor-parallel-size 1 \ |
| --enable-prefix-caching |
|
|
| |
| CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES=5 python3 -m vllm.entrypoints.openai.api_server \ |
| --model google/gemma-3-27b-it \ |
| --served-model-name subclaim-extractor \ |
| --gpu-memory-utilization 0.9 \ |
| --port 8052 \ |
| --max-model-len 16384 \ |
| --trust-remote-code \ |
| --tensor-parallel-size 1 \ |
| --enable-prefix-caching |
| |
|
|
| |
| |
| CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES=1 vllm serve Qwen/Qwen3-30B-A3B-Instruct-2507 \ |
| --max-model-len 16384 \ |
| --served-model-name newclaw \ |
| --enable-expert-parallel \ |
| --tensor-parallel-size 1 \ |
| --enable-auto-tool-choice \ |
| --tool-call-parser qwen3_xml |
| --dtype bfloat16 \ |
| --gpu-memory-utilization 0.9 \ |
| --port 8095 \ |
| --enable-reasoning \ |
| --reasoning-parser deepseek_r1 |
|
|
|
|
| |
| python3 /home/mshahidul/readctrl/code/finetune-inference/subclaim_support_extraction/extract_bn_subclaims_vllm.py --input_file "/home/mshahidul/readctrl/data/translated_data/translation_testing_3396/multiclinsum_test_en2bn_gemma(0_1000)_3396.json" --port 8050 |
|
|
| python3 /home/mshahidul/readctrl/code/finetune-inference/subclaim_support_extraction/extract_bn_subclaims_vllm.py --input_file "/home/mshahidul/readctrl/data/translated_data/translation_testing_3396/multiclinsum_test_en2bn_gemma(1000_2000)_3396.json" --port 8051 |
|
|
| python3 /home/mshahidul/readctrl/code/finetune-inference/subclaim_support_extraction/extract_bn_subclaims_vllm.py --input_file "/home/mshahidul/readctrl/data/translated_data/translation_testing_3396/multiclinsum_test_en2bn_gemma(2000_3396)_3396.json" --port 8052 |