from fastapi import FastAPI, UploadFile, File, Form from pydantic import BaseModel import openai import faiss import numpy as np import os from dotenv import load_dotenv from fastapi.middleware.cors import CORSMiddleware from pypdf import PdfReader load_dotenv() openai.api_key = os.getenv("OPENAI_API_KEY") app = FastAPI() app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Store documents and vectors per notebook notebooks = {} class Query(BaseModel): question: str notebook_id: str @app.get("/") def read_root(): return { "message": "✅ NotebookLM OpenAI Backend is running!", "endpoints": { "/upload-pdf": "POST a PDF file with notebook_id", "/ask": "POST question + notebook_id to get answer" } } @app.post("/ask") def ask(query: Query): nb = notebooks.get(query.notebook_id) if not nb: return {"answer": "Notebook not found."} question_embedding = openai.Embedding.create( input=[query.question], model="text-embedding-ada-002" )["data"][0]["embedding"] if len(nb["texts"]) == 0: return {"answer": "No documents indexed in this notebook."} D, I = nb["index"].search(np.array([question_embedding]).astype("float32"), k=3) context = "\n\n".join([f"[{i+1}] {nb['texts'][i]}" for i in I[0]]) citation_refs = [nb['citations'][i] for i in I[0]] response = openai.ChatCompletion.create( model="gpt-4", messages=[ {"role": "system", "content": "You are an AI assistant that answers based on uploaded documents. Cite sources using [1], [2], etc."}, {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {query.question}"} ], temperature=0.3 ) return {"answer": response.choices[0].message.content.strip(), "citations": citation_refs} @app.post("/upload-pdf") def upload_pdf(notebook_id: str = Form(...), file: UploadFile = File(...)): if notebook_id not in notebooks: notebooks[notebook_id] = { "index": faiss.IndexFlatL2(1536), "texts": [], "citations": [] } nb = notebooks[notebook_id] reader = PdfReader(file.file) for i, page in enumerate(reader.pages): content = page.extract_text() if content: embedding = openai.Embedding.create( input=[content], model="text-embedding-ada-002" )["data"][0]["embedding"] nb["index"].add(np.array([embedding]).astype("float32")) nb["texts"].append(content) nb["citations"].append(f"{file.filename}, page {i+1}") return {"status": f"{file.filename} uploaded and parsed"}