Spaces:

nateraw
/

text-generation-inference

Runtime error

text-generation-inference / aml /deployment.yaml

🍻 cheers

f9158ff over 1 year ago

1 kB

	$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
	name: bloom-deployment
	endpoint_name: bloom-inference
	model: azureml:bloom-safetensors:1
	model_mount_path: /var/azureml-model
	environment_variables:
	WEIGHTS_CACHE_OVERRIDE: /var/azureml-model/bloom-safetensors
	MODEL_ID: bigscience/bloom
	NUM_SHARD: 8
	environment:
	image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:0.2.0
	inference_config:
	liveness_route:
	port: 80
	path: /health
	readiness_route:
	port: 80
	path: /health
	scoring_route:
	port: 80
	path: /generate
	instance_type: Standard_ND96amsr_A100_v4
	request_settings:
	request_timeout_ms: 90000
	max_concurrent_requests_per_instance: 256
	liveness_probe:
	initial_delay: 600
	timeout: 90
	period: 120
	success_threshold: 1
	failure_threshold: 5
	readiness_probe:
	initial_delay: 600
	timeout: 90
	period: 120
	success_threshold: 1
	failure_threshold: 5
	instance_count: 1