Spaces:

ChrisSaws
/

mes-chatbot-rag-backend

Sleeping

File size: 4,878 Bytes

402e33f

from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import os
import requests
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFaceHub
from ..utils.vector_store import get_vector_store

app = FastAPI()


class QueryRequest(BaseModel):
    query: str

# Option 1: Hugging Face Inference API (Free with rate limits)


def setup_hf_api_model():
    """Uses Hugging Face's free Inference API"""
    # Get free token from https://huggingface.co/settings/tokens
    # Set this in your deployment
    hf_token = os.getenv("HUGGINGFACE_API_TOKEN")

    if not hf_token:
        raise ValueError("HUGGINGFACE_API_TOKEN environment variable required")

    llm = HuggingFaceHub(
        repo_id="microsoft/DialoGPT-medium",  # Free model
        model_kwargs={
            "temperature": 0.1,
            "max_length": 512
        },
        huggingfacehub_api_token=hf_token
    )
    return llm

# Option 2: Cohere API (Free tier: 100 API calls/month)


def setup_cohere_model():
    """Uses Cohere's free tier"""
    from langchain.llms import Cohere

    cohere_api_key = os.getenv("COHERE_API_KEY")
    if not cohere_api_key:
        raise ValueError("COHERE_API_KEY required")

    llm = Cohere(
        cohere_api_key=cohere_api_key,
        model="command-light",  # Free tier model
        temperature=0.1
    )
    return llm

# Option 3: Together AI (Free credits)


def setup_together_model():
    """Uses Together AI's free credits"""
    from langchain.llms import Together

    together_api_key = os.getenv("TOGETHER_API_KEY")
    if not together_api_key:
        raise ValueError("TOGETHER_API_KEY required")

    llm = Together(
        together_api_key=together_api_key,
        model="meta-llama/Llama-2-7b-chat-hf",
        temperature=0.1
    )
    return llm


# Initialize model (try different options in order of preference)
llm = None
model_used = "none"

try:
    llm = setup_hf_api_model()
    model_used = "huggingface"
    print("✅ Using Hugging Face Inference API")
except:
    try:
        llm = setup_cohere_model()
        model_used = "cohere"
        print("✅ Using Cohere API")
    except:
        try:
            llm = setup_together_model()
            model_used = "together"
            print("✅ Using Together AI")
        except Exception as e:
            print(f"❌ Failed to initialize any model: {e}")

# Setup QA chain
qa_chain = None
if llm:
    try:
        retriever = get_vector_store().as_retriever()
        qa_chain = RetrievalQA.from_chain_type(
            llm=llm,
            retriever=retriever,
            return_source_documents=True
        )
        print("✅ QA chain ready")
    except Exception as e:
        print(f"❌ QA chain failed: {e}")


@app.get("/")
def root():
    return {
        "status": "running",
        "model": model_used,
        "qa_ready": qa_chain is not None
    }


@app.post("/ask")
def ask_question(request: QueryRequest):
    if qa_chain is None:
        raise HTTPException(status_code=500, detail="Service not ready")

    try:
        result = qa_chain({"query": request.query})

        return {
            "answer": result["result"],
            "model_used": model_used,
            "sources": [
                {
                    "content": doc.page_content[:200] + "...",
                    "metadata": doc.metadata
                }
                for doc in result["source_documents"][:3]
            ]
        }

    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error: {str(e)}")


@app.post("/ask/{store_type}")
def ask_specific_store(store_type: str, request: QueryRequest):
    if llm is None:
        raise HTTPException(status_code=500, detail="LLM not available")

    store_paths = {
        "mes": "./vector_stores/mes_db",
        "general": "./vector_stores/general_db",
        "tech": "./vector_stores/tech_db"
    }

    if store_type not in store_paths:
        raise HTTPException(status_code=400, detail="Invalid store type")

    try:
        vector_store = get_vector_store(
            persist_directory=store_paths[store_type])
        retriever = vector_store.as_retriever()

        store_qa_chain = RetrievalQA.from_chain_type(
            llm=llm,
            retriever=retriever,
            return_source_documents=True
        )

        result = store_qa_chain({"query": request.query})

        return {
            "answer": result["result"],
            "store_used": store_type,
            "model_used": model_used,
            "sources": [doc.page_content[:200] + "..." for doc in result["source_documents"][:3]]
        }

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


if __name__ == "__main__":
    import uvicorn
    port = int(os.environ.get("PORT", 8000))
    uvicorn.run(app, host="0.0.0.0", port=port)