from huggingface_hub import InferenceClient endpoint_url = "https://your-endpoint-url-here" prompt = "Tell me about AI" prompt_template=f'''You are a helpful assistant for fiction writing. Always cut the bullshit and provide concise outlines with useful details. Do not turn your stories into fairy tales, be realistic. ### USER: {prompt} ### ASSISTANT: ''' client = InferenceClient(endpoint_url) response = client.text_generation(prompt, max_new_tokens=128, do_sample=True, temperature=0.7, top_p=0.95, top_k=40, repetition_penalty=1.1) print(f"Model output: {response}")