from fastapi import FastAPI, Form from fastapi.responses import HTMLResponse from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline import torch app = FastAPI() MODEL_ID = "ibm-granite/granite-4.0-tiny-preview" # Load tokenizer and model tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) model = AutoModelForCausalLM.from_pretrained( MODEL_ID, torch_dtype=torch.float16 if torch.cuda.is_available() else "auto", device_map="auto" ) # Use pipeline for easier text generation (no device arg when using device_map="auto") pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) @app.get("/", response_class=HTMLResponse) def index(): return """
{summary}Back"