from fastapi import FastAPI, UploadFile, File, Form from fastapi.middleware.cors import CORSMiddleware import torch from transformers import pipeline import PyPDF2 from docx import Document import io app = FastAPI( title="Text Summarization API", description="API for summarizing text and documents using Falcon's text summarization model" ) # Configure CORS app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Initialize the summarization pipeline device = "cuda" if torch.cuda.is_available() else "cpu" summarization_pipe = pipeline("summarization", model="Falconsai/text_summarization", device=device ) def extract_text_from_pdf(file_bytes): pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_bytes)) text = "" for page in pdf_reader.pages: text += page.extract_text() return text def extract_text_from_docx(file_bytes): doc = Document(io.BytesIO(file_bytes)) text = "" for paragraph in doc.paragraphs: text += paragraph.text + "\n" return text @app.post("/summarize/text") async def summarize_text(text: str = Form(...)): """ Summarize text input """ if not text: return {"error": "Please provide text to summarize"} summary = summarization_pipe(text) return {"summary": summary[0]['summary_text']} @app.post("/summarize/file") async def summarize_file(file: UploadFile = File(...)): """ Summarize text from a PDF or DOCX file """ contents = await file.read() file_name = file.filename.lower() try: if file_name.endswith('.pdf'): text = extract_text_from_pdf(contents) elif file_name.endswith('.docx'): text = extract_text_from_docx(contents) else: return {"error": "Unsupported file format. Please upload a PDF or DOCX file."} if not text: return {"error": "Could not extract text from the file"} summary = summarization_pipe(text) return {"summary": summary[0]['summary_text']} except Exception as e: return {"error": f"Error processing file: {str(e)}"} if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)