File size: 1,725 Bytes
271461a 4f6b66b fa4959b 5427b4e 4f6b66b 2d9d105 271461a fa4959b 5427b4e 2d9d105 4f6b66b fa4959b 271461a fa4959b 271461a 5427b4e 271461a 5427b4e 271461a fa4959b 271461a 4f6b66b 271461a 4f6b66b 271461a 4f6b66b 5427b4e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
from fastapi import FastAPI, Form
from fastapi.responses import HTMLResponse
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
app = FastAPI()
MODEL_ID = "ibm-granite/granite-4.0-tiny-preview"
# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
torch_dtype=torch.float16 if torch.cuda.is_available() else "auto",
device_map="auto"
)
# Use pipeline for easier text generation (no device arg when using device_map="auto")
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
@app.get("/", response_class=HTMLResponse)
def index():
return """
<html>
<head><title>Granite Tiny Summarizer</title></head>
<body>
<h1>Granite 4.0 Tiny Summarization Demo</h1>
<form action="/summarize" method="post">
<textarea name="text" rows="10" cols="80" placeholder="Paste text to summarize"></textarea><br>
<button type="submit">Summarize</button>
</form>
</body>
</html>
"""
@app.post("/summarize", response_class=HTMLResponse)
def summarize(text: str = Form(...)):
prompt = (
"Below is a passage of text. Please provide a concise summary in 2-4 sentences.\n\n"
f"Text:\n{text.strip()}\n\nSummary:"
)
outputs = pipe(
prompt,
max_new_tokens=150,
do_sample=True,
temperature=0.7,
top_p=0.95,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.eos_token_id
)
output_text = outputs[0]['generated_text']
summary = output_text.split("Summary:")[-1].strip()
return f"<h2>Summary</h2><pre>{summary}<br></pre><a href='/'>Back</a>" |