mes-chatbot-rag-backend / api /main_hface_hosted.py
christian
Remove big files for HF
402e33f
raw
history blame
4.88 kB
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import os
import requests
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFaceHub
from ..utils.vector_store import get_vector_store
app = FastAPI()
class QueryRequest(BaseModel):
query: str
# Option 1: Hugging Face Inference API (Free with rate limits)
def setup_hf_api_model():
"""Uses Hugging Face's free Inference API"""
# Get free token from https://huggingface.co/settings/tokens
# Set this in your deployment
hf_token = os.getenv("HUGGINGFACE_API_TOKEN")
if not hf_token:
raise ValueError("HUGGINGFACE_API_TOKEN environment variable required")
llm = HuggingFaceHub(
repo_id="microsoft/DialoGPT-medium", # Free model
model_kwargs={
"temperature": 0.1,
"max_length": 512
},
huggingfacehub_api_token=hf_token
)
return llm
# Option 2: Cohere API (Free tier: 100 API calls/month)
def setup_cohere_model():
"""Uses Cohere's free tier"""
from langchain.llms import Cohere
cohere_api_key = os.getenv("COHERE_API_KEY")
if not cohere_api_key:
raise ValueError("COHERE_API_KEY required")
llm = Cohere(
cohere_api_key=cohere_api_key,
model="command-light", # Free tier model
temperature=0.1
)
return llm
# Option 3: Together AI (Free credits)
def setup_together_model():
"""Uses Together AI's free credits"""
from langchain.llms import Together
together_api_key = os.getenv("TOGETHER_API_KEY")
if not together_api_key:
raise ValueError("TOGETHER_API_KEY required")
llm = Together(
together_api_key=together_api_key,
model="meta-llama/Llama-2-7b-chat-hf",
temperature=0.1
)
return llm
# Initialize model (try different options in order of preference)
llm = None
model_used = "none"
try:
llm = setup_hf_api_model()
model_used = "huggingface"
print("βœ… Using Hugging Face Inference API")
except:
try:
llm = setup_cohere_model()
model_used = "cohere"
print("βœ… Using Cohere API")
except:
try:
llm = setup_together_model()
model_used = "together"
print("βœ… Using Together AI")
except Exception as e:
print(f"❌ Failed to initialize any model: {e}")
# Setup QA chain
qa_chain = None
if llm:
try:
retriever = get_vector_store().as_retriever()
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
retriever=retriever,
return_source_documents=True
)
print("βœ… QA chain ready")
except Exception as e:
print(f"❌ QA chain failed: {e}")
@app.get("/")
def root():
return {
"status": "running",
"model": model_used,
"qa_ready": qa_chain is not None
}
@app.post("/ask")
def ask_question(request: QueryRequest):
if qa_chain is None:
raise HTTPException(status_code=500, detail="Service not ready")
try:
result = qa_chain({"query": request.query})
return {
"answer": result["result"],
"model_used": model_used,
"sources": [
{
"content": doc.page_content[:200] + "...",
"metadata": doc.metadata
}
for doc in result["source_documents"][:3]
]
}
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
@app.post("/ask/{store_type}")
def ask_specific_store(store_type: str, request: QueryRequest):
if llm is None:
raise HTTPException(status_code=500, detail="LLM not available")
store_paths = {
"mes": "./vector_stores/mes_db",
"general": "./vector_stores/general_db",
"tech": "./vector_stores/tech_db"
}
if store_type not in store_paths:
raise HTTPException(status_code=400, detail="Invalid store type")
try:
vector_store = get_vector_store(
persist_directory=store_paths[store_type])
retriever = vector_store.as_retriever()
store_qa_chain = RetrievalQA.from_chain_type(
llm=llm,
retriever=retriever,
return_source_documents=True
)
result = store_qa_chain({"query": request.query})
return {
"answer": result["result"],
"store_used": store_type,
"model_used": model_used,
"sources": [doc.page_content[:200] + "..." for doc in result["source_documents"][:3]]
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
if __name__ == "__main__":
import uvicorn
port = int(os.environ.get("PORT", 8000))
uvicorn.run(app, host="0.0.0.0", port=port)