import os import zipfile from fastapi import FastAPI, HTTPException from pydantic import BaseModel from langchain_community.vectorstores import FAISS from langchain_huggingface import HuggingFaceEmbeddings from langchain_groq import ChatGroq from langchain.chains import RetrievalQA from langchain.prompts import PromptTemplate app = FastAPI() # === Globals === llm = None embeddings = None vectorstore = None retriever = None chain = None class QueryRequest(BaseModel): question: str @app.on_event("startup") def load_components(): global llm, embeddings, vectorstore, retriever, chain # 1) Init LLM & Embeddings llm = ChatGroq( model="meta-llama/llama-4-scout-17b-16e-instruct", temperature=0, max_tokens=1024, api_key=os.getenv("API_KEY"), ) embeddings = HuggingFaceEmbeddings( model_name="intfloat/multilingual-e5-large", model_kwargs={"device": "cpu"}, encode_kwargs={"normalize_embeddings": True}, ) # 2) Unzip & Load both FAISS vectorstores # — First index zip1 = "faiss_index.zip" dir1 = "faiss_index" if not os.path.exists(dir1): with zipfile.ZipFile(zip1, 'r') as z: z.extractall(dir1) print("✅ Unzipped FAISS index 1.") vs1 = FAISS.load_local( dir1, embeddings, allow_dangerous_deserialization=True ) print("✅ FAISS index 1 loaded.") # — Second index zip2 = "faiss_index(1).zip" dir2 = "faiss_index_extra" if not os.path.exists(dir2): with zipfile.ZipFile(zip2, 'r') as z: z.extractall(dir2) print("✅ Unzipped FAISS index 2.") vs2 = FAISS.load_local( dir2, embeddings, allow_dangerous_deserialization=True ) print("✅ FAISS index 2 loaded.") # 3) Merge them vs1.merge_from(vs2) vectorstore = vs1 print("✅ Merged FAISS indexes into a single vectorstore.") # 4) Create retriever & QA chain retriever = vectorstore.as_retriever(search_kwargs={"k": 3}) prompt = PromptTemplate( template=""" You are an expert assistant on Islamic knowledge. Use **only** the information in the “Retrieved context” to answer the user’s question. Do **not** add any outside information, personal opinions, or conjecture—if the answer is not contained in the context, reply with “لا أعلم”. Be concise, accurate, and directly address the user’s question. Retrieved context: {context} User’s question: {question} Your response: """, input_variables=["context", "question"], ) chain = RetrievalQA.from_chain_type( llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=False, chain_type_kwargs={"prompt": prompt}, ) print("✅ QA chain ready.") @app.get("/") def root(): return {"message": "Arabic Hadith Finder API is up and running!"} @app.post("/query") def query(request: QueryRequest): try: result = chain.invoke({"query": request.question}) return {"answer": result["result"]} except Exception as e: raise HTTPException(status_code=500, detail=str(e))