set -eux | |
LLM_RECIPES_DIR=/code/llm-recipes | |
source $LLM_RECIPES_DIR/scripts/wmt2024/tokens.sh | |
for i in `seq 7 9`; do | |
GPU_ID=$((i-5)) | |
python /code/llm-recipes/tools/hf_inference.py \ | |
--model /work/models/additiona_trained_hf/llama2-en-ja-continuous-pretrained-v0-dev-finetune-chunked-docs-all-averaged-841-845 \ | |
-i /work/wmt2024_test/LLM/split/en-ja/wmttest2024.src.sentence_splited.with_template.en-ja.en.jsonl.0${i} \ | |
-o /work/translation/wmt24_test/en-ja/llama2-top-p-0.95/split_0${i} \ | |
-g ${GPU_ID} \ | |
-b 158 \ | |
--attn_implementation sdpa \ | |
--dynamic_max_new_token_ratio 3.0 \ | |
--num_return_sequences 50 \ | |
--do_sample \ | |
--top_p 0.95 \ | |
--max_input_tokens 158 & | |
done | |
wait | |