from sagemaker.huggingface import HuggingFaceModel import sagemaker # IAM role with permissions to create endpoint role = sagemaker.get_execution_role() # public S3 URI to gpt-j artifact model_uri="s3://huggingface-sagemaker-models/transformers/4.12.3/pytorch/1.9.1/gpt-j/model.tar.gz" # create Hugging Face Model Class huggingface_model = HuggingFaceModel( model_data=model_uri, transformers_version='4.12.3', pytorch_version='1.9.1', py_version='py38', role=role, ) # deploy model to SageMaker Inference predictor = huggingface_model.deploy( initial_instance_count=1, # number of instances instance_type='ml.g4dn.xlarge', #'ml.p3.2xlarge' # ec2 instance type endpoint_name='sm-endpoint-gpt-j-6b' )