mixtral-onmt-awq-gemv / mixtral-inference-awq.yaml
vince62s's picture
Upload 2 files
39e20f2
transforms: [sentencepiece]
#### Subword
src_subword_model: "/mnt/InternalCrucial4/dataAI/mixtral/tokenizer.model"
tgt_subword_model: "/mnt/InternalCrucial4/dataAI/mixtral/tokenizer.model"
# Model info
model: "/mnt/InternalCrucial4/dataAI/mixtral/mixtral-onmt-awq.pt"
# Inference
seed: 42
max_length: 256
gpu: 0
batch_type: sents
batch_size: 1
world_size: 2
gpu_ranks: [0, 1]
parallel_mode: "tensor_parallel"
precision: fp16
#random_sampling_topk: 1
#random_sampling_topp: 0.6
#random_sampling_temp: 0.9
beam_size: 1
n_best: 1
profile: false
report_time: true
src: None
#tgt: None