Spaces:
Runtime error
Runtime error
File size: 1,002 Bytes
f9158ff |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
name: bloom-deployment
endpoint_name: bloom-inference
model: azureml:bloom-safetensors:1
model_mount_path: /var/azureml-model
environment_variables:
WEIGHTS_CACHE_OVERRIDE: /var/azureml-model/bloom-safetensors
MODEL_ID: bigscience/bloom
NUM_SHARD: 8
environment:
image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:0.2.0
inference_config:
liveness_route:
port: 80
path: /health
readiness_route:
port: 80
path: /health
scoring_route:
port: 80
path: /generate
instance_type: Standard_ND96amsr_A100_v4
request_settings:
request_timeout_ms: 90000
max_concurrent_requests_per_instance: 256
liveness_probe:
initial_delay: 600
timeout: 90
period: 120
success_threshold: 1
failure_threshold: 5
readiness_probe:
initial_delay: 600
timeout: 90
period: 120
success_threshold: 1
failure_threshold: 5
instance_count: 1
|