Spaces:
Runtime error
Runtime error
| from loguru import logger | |
| try: | |
| from sagemaker.enums import EndpointType | |
| from sagemaker.huggingface import get_huggingface_llm_image_uri | |
| except ModuleNotFoundError: | |
| logger.warning("Couldn't load SageMaker imports. Run 'poetry install --with aws' to support AWS.") | |
| from llm_engineering.model.utils import ResourceManager | |
| from llm_engineering.settings import settings | |
| from .config import hugging_face_deploy_config, model_resource_config | |
| from .sagemaker_huggingface import DeploymentService, SagemakerHuggingfaceStrategy | |
| def create_endpoint(endpoint_type=EndpointType.INFERENCE_COMPONENT_BASED) -> None: | |
| assert settings.AWS_ARN_ROLE is not None, "AWS_ARN_ROLE is not set in the .env file." | |
| logger.info(f"Creating endpoint with endpoint_type = {endpoint_type} and model_id = {settings.HF_MODEL_ID}") | |
| llm_image = get_huggingface_llm_image_uri("huggingface", version="2.2.0") | |
| resource_manager = ResourceManager() | |
| deployment_service = DeploymentService(resource_manager=resource_manager) | |
| SagemakerHuggingfaceStrategy(deployment_service).deploy( | |
| role_arn=settings.AWS_ARN_ROLE, | |
| llm_image=llm_image, | |
| config=hugging_face_deploy_config, | |
| endpoint_name=settings.SAGEMAKER_ENDPOINT_INFERENCE, | |
| endpoint_config_name=settings.SAGEMAKER_ENDPOINT_CONFIG_INFERENCE, | |
| gpu_instance_type=settings.GPU_INSTANCE_TYPE, | |
| resources=model_resource_config, | |
| endpoint_type=endpoint_type, | |
| ) | |
| if __name__ == "__main__": | |
| create_endpoint(endpoint_type=EndpointType.MODEL_BASED) | |