from huggingface_hub import InferenceClient, create_inference_endpoint # Create the inference endpoint endpoint = create_inference_endpoint( name="my-custom-endpoint", repository="path/to/your/model/repository", framework="custom", task="text-classification", accelerator="cpu", # or "gpu" if needed instance_size="medium", instance_type="c6i", region="us-east-1", custom_image={ "health_route": "/healthz", "port": 8080, "url": "your-docker-image-url:latest" } ) # Wait for the endpoint to be ready endpoint.wait() # Create a client to interact with the endpoint client = InferenceClient(endpoint.url) # Test the endpoint result = client.text_classification("This is a test input") print(result)