|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
name: "turbomind" |
|
backend: "turbomind" |
|
default_model_filename: "weights" |
|
max_batch_size: 1 |
|
|
|
model_transaction_policy { |
|
decoupled: True |
|
} |
|
|
|
instance_group [ |
|
{ |
|
|
|
count: 48 |
|
kind: KIND_CPU |
|
} |
|
] |
|
|
|
input [ |
|
{ |
|
name: "input_ids" |
|
data_type: TYPE_UINT32 |
|
dims: [ -1 ] |
|
|
|
}, |
|
{ |
|
name: "input_lengths" |
|
data_type: TYPE_UINT32 |
|
dims: [ 1 ] |
|
reshape: { shape: [ ] } |
|
}, |
|
{ |
|
name: "request_output_len" |
|
data_type: TYPE_UINT32 |
|
dims: [ -1 ] |
|
}, |
|
{ |
|
name: "input_embeddings" |
|
data_type: TYPE_INT8 |
|
dims: [ -1 ] |
|
optional: true |
|
}, |
|
{ |
|
name: "input_embedding_ranges" |
|
data_type: TYPE_UINT32 |
|
dims: [ -1, 2 ] |
|
optional: true |
|
}, |
|
{ |
|
name: "step" |
|
data_type: TYPE_INT32 |
|
dims: [ 1 ] |
|
reshape: { shape: [ ] } |
|
optional: true |
|
}, |
|
{ |
|
name: "session_len" |
|
data_type: TYPE_UINT32 |
|
dims: [ 1 ] |
|
reshape: { shape: [ ] } |
|
optional: true |
|
}, |
|
{ |
|
name: "runtime_top_k" |
|
data_type: TYPE_UINT32 |
|
dims: [ 1 ] |
|
reshape: { shape: [ ] } |
|
optional: true |
|
}, |
|
{ |
|
name: "runtime_top_p" |
|
data_type: TYPE_FP32 |
|
dims: [ 1 ] |
|
reshape: { shape: [ ] } |
|
optional: true |
|
}, |
|
{ |
|
name: "beam_search_diversity_rate" |
|
data_type: TYPE_FP32 |
|
dims: [ 1 ] |
|
reshape: { shape: [ ] } |
|
optional: true |
|
}, |
|
{ |
|
name: "temperature" |
|
data_type: TYPE_FP32 |
|
dims: [ 1 ] |
|
reshape: { shape: [ ] } |
|
optional: true |
|
}, |
|
{ |
|
name: "len_penalty" |
|
data_type: TYPE_FP32 |
|
dims: [ 1 ] |
|
reshape: { shape: [ ] } |
|
optional: true |
|
}, |
|
{ |
|
name: "repetition_penalty" |
|
data_type: TYPE_FP32 |
|
dims: [ 1 ] |
|
reshape: { shape: [ ] } |
|
optional: true |
|
}, |
|
{ |
|
name: "random_seed" |
|
data_type: TYPE_UINT64 |
|
dims: [ 1 ] |
|
reshape: { shape: [ ] } |
|
optional: true |
|
}, |
|
{ |
|
name: "is_return_log_probs" |
|
data_type: TYPE_BOOL |
|
dims: [ 1 ] |
|
reshape: { shape: [ ] } |
|
optional: true |
|
}, |
|
{ |
|
name: "beam_width" |
|
data_type: TYPE_UINT32 |
|
dims: [ 1 ] |
|
reshape: { shape: [ ] } |
|
optional: true |
|
}, |
|
{ |
|
name: "start_id" |
|
data_type: TYPE_UINT32 |
|
dims: [ 1 ] |
|
reshape: { shape: [ ] } |
|
optional: true |
|
}, |
|
{ |
|
name: "end_id" |
|
data_type: TYPE_UINT32 |
|
dims: [ 1 ] |
|
reshape: { shape: [ ] } |
|
optional: true |
|
}, |
|
{ |
|
name: "bad_words_list" |
|
data_type: TYPE_INT32 |
|
dims: [ 2, -1 ] |
|
optional: true |
|
}, |
|
{ |
|
name: "stop_words_list" |
|
data_type: TYPE_INT32 |
|
dims: [ 2, -1 ] |
|
optional: true |
|
}, |
|
{ |
|
name: "prompt_learning_task_name_ids" |
|
data_type: TYPE_UINT32 |
|
dims: [ 1 ] |
|
reshape: { shape: [ ] } |
|
optional: true |
|
}, |
|
{ |
|
name: "top_p_decay" |
|
data_type: TYPE_FP32 |
|
dims: [ 1 ] |
|
reshape: { shape: [ ] } |
|
optional: true |
|
}, |
|
{ |
|
name: "top_p_min" |
|
data_type: TYPE_FP32 |
|
dims: [ 1 ] |
|
reshape: { shape: [ ] } |
|
optional: true |
|
}, |
|
{ |
|
name: "top_p_reset_ids" |
|
data_type: TYPE_UINT32 |
|
dims: [ 1 ] |
|
reshape: { shape: [ ] } |
|
optional: true |
|
}, |
|
{ |
|
name: "START" |
|
data_type: TYPE_INT32 |
|
dims: [ 1 ] |
|
reshape: { shape: [ ] } |
|
optional: true |
|
}, |
|
{ |
|
name: "END" |
|
data_type: TYPE_INT32 |
|
dims: [ 1 ] |
|
reshape: { shape: [ ] } |
|
optional: true |
|
}, |
|
{ |
|
name: "STOP" |
|
data_type: TYPE_INT32 |
|
dims: [ 1 ] |
|
reshape: { shape: [ ] } |
|
optional: true |
|
}, |
|
{ |
|
name: "CORRID" |
|
data_type: TYPE_UINT64 |
|
dims: [ 1 ] |
|
reshape: { shape: [ ] } |
|
optional: true |
|
} |
|
] |
|
output [ |
|
{ |
|
name: "output_ids" |
|
data_type: TYPE_UINT32 |
|
dims: [ -1, -1 ] |
|
}, |
|
{ |
|
name: "sequence_length" |
|
data_type: TYPE_UINT32 |
|
dims: [ -1 ] |
|
}, |
|
{ |
|
name: "cum_log_probs" |
|
data_type: TYPE_FP32 |
|
dims: [ -1 ] |
|
}, |
|
{ |
|
name: "output_log_probs" |
|
data_type: TYPE_FP32 |
|
dims: [ -1, -1 ] |
|
} |
|
] |
|
|
|
parameters { |
|
key: "pipeline_para_size" |
|
value: { |
|
string_value: "1" |
|
} |
|
} |
|
parameters { |
|
key: "data_type" |
|
value: { |
|
string_value: "fp16" |
|
} |
|
} |
|
parameters { |
|
key: "model_type" |
|
value: { |
|
string_value: "Llama" |
|
} |
|
} |
|
|
|
parameters { |
|
key: "enable_custom_all_reduce" |
|
value: { |
|
string_value: "0" |
|
} |
|
} |
|
parameters { |
|
key: "tensor_para_size" |
|
value: { |
|
string_value: "1" |
|
} |
|
} |
|
parameters { |
|
key: "model_name" |
|
value: { |
|
string_value: "internlm2-chat-7b" |
|
} |
|
} |
|
|