transforms: [sentencepiece] | |
#### Subword | |
src_subword_model: "tokenizer.model" | |
tgt_subword_model: "tokenizer.model" | |
# Model info | |
model: "mistral-instruct-v0.2-onmt-awq-gemm.pt" | |
# Inference | |
seed: 42 | |
max_length: 256 | |
gpu: 0 | |
batch_type: sents | |
batch_size: 60 | |
world_size: 1 | |
gpu_ranks: [0] | |
#parallel_mode: "tensor_parallel" | |
#quant_layers: ['w_1', 'w_2', 'w_3', 'linear_values', 'linear_query', 'linear_keys', 'final_linear'] | |
#quant_type: "bnb_NF4" | |
precision: fp16 | |
#random_sampling_topk: 1 | |
#random_sampling_topp: 0.6 | |
#random_sampling_temp: 0.9 | |
beam_size: 1 | |
n_best: 1 | |
profile: false | |
report_time: true | |
src: None | |
#tgt: None | |