Spaces:
Runtime error
Runtime error
$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json | |
name: bloom-deployment | |
endpoint_name: bloom-inference | |
model: azureml:bloom-safetensors:1 | |
model_mount_path: /var/azureml-model | |
environment_variables: | |
WEIGHTS_CACHE_OVERRIDE: /var/azureml-model/bloom-safetensors | |
MODEL_ID: bigscience/bloom | |
NUM_SHARD: 8 | |
environment: | |
image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:0.2.0 | |
inference_config: | |
liveness_route: | |
port: 80 | |
path: /health | |
readiness_route: | |
port: 80 | |
path: /health | |
scoring_route: | |
port: 80 | |
path: /generate | |
instance_type: Standard_ND96amsr_A100_v4 | |
request_settings: | |
request_timeout_ms: 90000 | |
max_concurrent_requests_per_instance: 256 | |
liveness_probe: | |
initial_delay: 600 | |
timeout: 90 | |
period: 120 | |
success_threshold: 1 | |
failure_threshold: 5 | |
readiness_probe: | |
initial_delay: 600 | |
timeout: 90 | |
period: 120 | |
success_threshold: 1 | |
failure_threshold: 5 | |
instance_count: 1 | |