transforms: [sentencepiece] #### Subword src_subword_model: "tokenizer.model" tgt_subword_model: "tokenizer.model" # Model info model: "mistral-instruct-v0.2-onmt-awq-gemm.pt" # Inference seed: 42 max_length: 256 gpu: 0 batch_type: sents batch_size: 60 world_size: 1 gpu_ranks: [0] #parallel_mode: "tensor_parallel" #quant_layers: ['w_1', 'w_2', 'w_3', 'linear_values', 'linear_query', 'linear_keys', 'final_linear'] #quant_type: "bnb_NF4" precision: fp16 #random_sampling_topk: 1 #random_sampling_topp: 0.6 #random_sampling_temp: 0.9 beam_size: 1 n_best: 1 profile: false report_time: true src: None #tgt: None