Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, HTTPException | |
| from pydantic import BaseModel | |
| import torch | |
| from transformers import T5ForConditionalGeneration, T5Tokenizer | |
| # Initialize FastAPI app | |
| app = FastAPI() | |
| # Lazy load model and tokenizer | |
| model = None | |
| tokenizer = None | |
| def load_model(): | |
| global model, tokenizer | |
| if model is None or tokenizer is None: | |
| tokenizer = T5Tokenizer.from_pretrained('./tokenizer12') | |
| model = T5ForConditionalGeneration.from_pretrained('./model') | |
| model.to('cuda' if torch.cuda.is_available() else 'cpu') | |
| # Request body schema using Pydantic | |
| class QuestionRequest(BaseModel): | |
| context: str | |
| answer: str | |
| from fastapi import Query | |
| async def generate_question(request: QuestionRequest): | |
| load_model() | |
| device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
| input_text = f"context: {request.context} answer: {request.answer}" | |
| encoding = tokenizer.encode_plus( | |
| input_text, | |
| max_length=512, | |
| padding="max_length", | |
| truncation=True, | |
| return_tensors="pt" | |
| ) | |
| input_ids = encoding["input_ids"].to(device) | |
| attention_mask = encoding["attention_mask"].to(device) | |
| model.eval() | |
| with torch.no_grad(): | |
| beam_outputs = model.generate( | |
| input_ids=input_ids, | |
| attention_mask=attention_mask, | |
| max_length=72, | |
| early_stopping=True, | |
| num_beams=5, | |
| num_return_sequences=3 | |
| ) | |
| return { | |
| "generated_questions": [ | |
| tokenizer.decode(output, skip_special_tokens=True, clean_up_tokenization_spaces=True) | |
| for output in beam_outputs | |
| ] | |
| } | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=7860) | |