Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("fill-mask", model="casehold/custom-legalbert") # Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("casehold/custom-legalbert") import requests API_URL = "https://api-inference.huggingface.co/models/casehold/custom-legalbert" headers = {"Authorization": "Bearer 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"} def query(payload): response = requests.post(API_URL, headers=headers, json=payload) return response.json() output = query({ "inputs": "The answer to the universe is [MASK].", }) import gradio as gr gr.Interface.load("models/casehold/custom-legalbert").launch()