gptj-gui / deploy-gptj.py
alertFunction's picture
Create deploy-gptj.py
7e732e4
raw
history blame contribute delete
No virus
737 Bytes
from sagemaker.huggingface import HuggingFaceModel
import sagemaker
# IAM role with permissions to create endpoint
role = sagemaker.get_execution_role()
# public S3 URI to gpt-j artifact
model_uri="s3://huggingface-sagemaker-models/transformers/4.12.3/pytorch/1.9.1/gpt-j/model.tar.gz"
# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
model_data=model_uri,
transformers_version='4.12.3',
pytorch_version='1.9.1',
py_version='py38',
role=role,
)
# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
initial_instance_count=1, # number of instances
instance_type='ml.g4dn.xlarge', #'ml.p3.2xlarge' # ec2 instance type
endpoint_name='sm-endpoint-gpt-j-6b'
)