File size: 1,002 Bytes
f9158ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
name: bloom-deployment
endpoint_name: bloom-inference
model: azureml:bloom-safetensors:1
model_mount_path: /var/azureml-model
environment_variables:
  WEIGHTS_CACHE_OVERRIDE: /var/azureml-model/bloom-safetensors
  MODEL_ID: bigscience/bloom
  NUM_SHARD: 8
environment:
  image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:0.2.0
  inference_config:
    liveness_route:
      port: 80
      path: /health
    readiness_route:
      port: 80
      path: /health
    scoring_route:
      port: 80
      path: /generate
instance_type: Standard_ND96amsr_A100_v4
request_settings:
  request_timeout_ms: 90000
  max_concurrent_requests_per_instance: 256
liveness_probe:
  initial_delay: 600
  timeout: 90
  period: 120
  success_threshold: 1
  failure_threshold: 5
readiness_probe:
  initial_delay: 600
  timeout: 90
  period: 120
  success_threshold: 1
  failure_threshold: 5
instance_count: 1