Spaces:
Runtime error
Runtime error
syntax = "proto3"; | |
package generate.v1; | |
service TextGenerationService { | |
/// Model Info | |
rpc Info (InfoRequest) returns (InfoResponse) {} | |
/// Service discovery | |
rpc ServiceDiscovery (ServiceDiscoveryRequest) returns (ServiceDiscoveryResponse) {} | |
/// Empties batch cache | |
rpc ClearCache (ClearCacheRequest) returns (ClearCacheResponse); | |
/// Remove requests from a cached batch | |
rpc FilterBatch (FilterBatchRequest) returns (FilterBatchResponse); | |
/// Prefill batch and decode first token | |
rpc Prefill (PrefillRequest) returns (PrefillResponse); | |
/// Decode token for a list of prefilled batches | |
rpc Decode (DecodeRequest) returns (DecodeResponse); | |
/// Health check | |
rpc Health (HealthRequest) returns (HealthResponse); | |
} | |
message HealthRequest {} | |
message HealthResponse {} | |
/// Empty request | |
message InfoRequest {} | |
message InfoResponse { | |
bool requires_padding = 1; | |
string dtype = 2; | |
string device_type = 3; | |
} | |
/// Empty request | |
message ServiceDiscoveryRequest {} | |
message ServiceDiscoveryResponse { | |
/// Other shards urls | |
repeated string urls = 1; | |
} | |
message ClearCacheRequest { | |
/// Optional batch id | |
optional uint64 id = 1; | |
} | |
/// Empty response | |
message ClearCacheResponse {} | |
message NextTokenChooserParameters { | |
/// exponential scaling output probability distribution | |
float temperature = 1; | |
/// restricting to the k highest probability elements | |
uint32 top_k = 2; | |
/// restricting to top tokens summing to prob_cut_off <= prob_cut_off | |
float top_p = 3; | |
/// restricting to top tokens summing to prob_cut_off <= prob_cut_off | |
float typical_p = 4; | |
/// apply sampling on the logits | |
bool do_sample = 5; | |
/// random seed for sampling | |
uint64 seed = 6; | |
/// repetition penalty | |
float repetition_penalty = 7; | |
/// token watermarking using "A Watermark for Large Language Models" | |
bool watermark = 8; | |
} | |
message StoppingCriteriaParameters { | |
/// Maximum number of generated tokens | |
uint32 max_new_tokens = 1; | |
/// Optional stopping sequences | |
repeated string stop_sequences = 2; | |
/// Ignore end of sequence token | |
/// used for benchmarking | |
bool ignore_eos_token = 3; | |
} | |
message Request { | |
/// Request ID | |
uint64 id = 1; | |
/// The generation context | |
string inputs = 2; | |
/// Context truncation | |
uint32 truncate = 3; | |
/// Next Token Chooser Parameters | |
NextTokenChooserParameters parameters = 4; | |
/// Stopping Criteria Parameters | |
StoppingCriteriaParameters stopping_parameters = 5; | |
} | |
message Batch { | |
/// Batch ID | |
uint64 id = 1; | |
/// Individual requests | |
repeated Request requests = 2; | |
/// Batch size (==len(requests)) | |
uint32 size = 3; | |
/// Maximum number of tokens this batch will grow to | |
uint32 max_tokens = 4; | |
} | |
enum FinishReason { | |
FINISH_REASON_LENGTH = 0; | |
FINISH_REASON_EOS_TOKEN = 1; | |
FINISH_REASON_STOP_SEQUENCE = 2; | |
} | |
message GeneratedText { | |
/// Output | |
string text = 1; | |
/// Number of generated tokens | |
uint32 generated_tokens = 2; | |
/// Finish reason | |
FinishReason finish_reason = 3; | |
/// Seed | |
optional uint64 seed = 4; | |
} | |
message PrefillTokens { | |
/// Prefill Token IDs | |
repeated uint32 ids = 1; | |
/// Prefill Logprobs | |
repeated float logprobs = 2; | |
/// Prefill tokens | |
repeated string texts = 3; | |
} | |
message Generation { | |
/// Request ID | |
uint64 request_id = 1; | |
/// Prefill tokens (optional) | |
PrefillTokens prefill_tokens = 2; | |
/// Token ID | |
uint32 token_id = 3; | |
/// Logprob | |
float token_logprob = 4; | |
/// Text | |
string token_text = 5; | |
/// Is it a special token | |
bool token_is_special = 6; | |
/// Complete generated text | |
GeneratedText generated_text = 7; | |
} | |
message FilterBatchRequest { | |
/// Batch ID | |
uint64 batch_id = 1; | |
/// Requests to keep | |
repeated Request keep_requests = 2; | |
} | |
message FilterBatchResponse { | |
/// Filtered Batch (cached) | |
Batch batch = 1; | |
} | |
message PrefillRequest { | |
/// Batch | |
Batch batch = 1; | |
} | |
message PrefillResponse { | |
/// Generation | |
repeated Generation generations = 1; | |
/// Next batch (cached) | |
optional Batch batch = 2; | |
} | |
message DecodeRequest { | |
/// Cached batches | |
repeated Batch batches = 1; | |
} | |
message DecodeResponse { | |
/// Decodes | |
repeated Generation generations = 1; | |
/// Next batch (cached) | |
optional Batch batch = 2; | |
} | |